gcc/config/arm/arm.cc

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2023 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "cfgloop.h"
  36 #include "df.h"
  37 #include "tm_p.h"
  38 #include "stringpool.h"
  39 #include "attribs.h"
  40 #include "optabs.h"
  41 #include "regs.h"
  42 #include "emit-rtl.h"
  43 #include "recog.h"
  44 #include "cgraph.h"
  45 #include "diagnostic-core.h"
  46 #include "alias.h"
  47 #include "fold-const.h"
  48 #include "stor-layout.h"
  49 #include "calls.h"
  50 #include "varasm.h"
  51 #include "output.h"
  52 #include "insn-attr.h"
  53 #include "flags.h"
  54 #include "reload.h"
  55 #include "explow.h"
  56 #include "expr.h"
  57 #include "cfgrtl.h"
  58 #include "sched-int.h"
  59 #include "common/common-target.h"
  60 #include "langhooks.h"
  61 #include "intl.h"
  62 #include "libfuncs.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "gimple-iterator.h"
  73 #include "selftest.h"
  74 #include "tree-vectorizer.h"
  75 #include "opts.h"
  76 #include "aarch-common.h"
  77 #include "aarch-common-protos.h"
  78
  79 /* This file should be included last.  */
  80 #include "target-def.h"
  81
  82 /* Forward definitions of types.  */
  83 typedef struct minipool_node    Mnode;
  84 typedef struct minipool_fixup   Mfix;
  85
  86 void (*arm_lang_output_object_attributes_hook)(void);
  87
  88 struct four_ints
  89 {
  90   int i[4];
  91 };
  92
  93 /* Forward function declarations.  */
  94 static bool arm_const_not_ok_for_debug_p (rtx);
  95 static int arm_needs_doubleword_align (machine_mode, const_tree);
  96 static int arm_compute_static_chain_stack_bytes (void);
  97 static arm_stack_offsets *arm_get_frame_offsets (void);
  98 static void arm_compute_frame_layout (void);
  99 static void arm_add_gc_roots (void);
 100 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 101                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 102 static unsigned bit_count (unsigned long);
 103 static unsigned bitmap_popcount (const sbitmap);
 104 static int arm_address_register_rtx_p (rtx, int);
 105 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 106 static bool is_called_in_ARM_mode (tree);
 107 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 108 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 109 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 110 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 111 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 112 inline static int thumb1_index_register_rtx_p (rtx, int);
 113 static int thumb_far_jump_used_p (void);
 114 static bool thumb_force_lr_save (void);
 115 static unsigned arm_size_return_regs (void);
 116 static bool arm_assemble_integer (rtx, unsigned int, int);
 117 static void arm_print_operand (FILE *, rtx, int);
 118 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 119 static bool arm_print_operand_punct_valid_p (unsigned char code);
 120 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 121 static arm_cc get_arm_condition_code (rtx);
 122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 123 static const char *output_multi_immediate (rtx *, const char *, const char *,
 124                                            int, HOST_WIDE_INT);
 125 static const char *shift_op (rtx, HOST_WIDE_INT *);
 126 static struct machine_function *arm_init_machine_status (void);
 127 static void thumb_exit (FILE *, int);
 128 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 129 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 130 static Mnode *add_minipool_forward_ref (Mfix *);
 131 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 132 static Mnode *add_minipool_backward_ref (Mfix *);
 133 static void assign_minipool_offsets (Mfix *);
 134 static void arm_print_value (FILE *, rtx);
 135 static void dump_minipool (rtx_insn *);
 136 static int arm_barrier_cost (rtx_insn *);
 137 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 138 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 139 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 140                                machine_mode, rtx);
 141 static void arm_reorg (void);
 142 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 144 static unsigned long arm_compute_save_core_reg_mask (void);
 145 static unsigned long arm_isr_value (tree);
 146 static unsigned long arm_compute_func_type (void);
 147 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 149 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 151 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 152 #endif
 153 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 154 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 155 static void arm_output_function_epilogue (FILE *);
 156 static void arm_output_function_prologue (FILE *);
 157 static int arm_comp_type_attributes (const_tree, const_tree);
 158 static void arm_set_default_type_attributes (tree);
 159 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 161 static int optimal_immediate_sequence (enum rtx_code code,
 162                                        unsigned HOST_WIDE_INT val,
 163                                        struct four_ints *return_sequence);
 164 static int optimal_immediate_sequence_1 (enum rtx_code code,
 165                                          unsigned HOST_WIDE_INT val,
 166                                          struct four_ints *return_sequence,
 167                                          int i);
 168 static int arm_get_strip_length (int);
 169 static bool arm_function_ok_for_sibcall (tree, tree);
 170 static machine_mode arm_promote_function_mode (const_tree,
 171                                                     machine_mode, int *,
 172                                                     const_tree, int);
 173 static bool arm_return_in_memory (const_tree, const_tree);
 174 static rtx arm_function_value (const_tree, const_tree, bool);
 175 static rtx arm_libcall_value_1 (machine_mode);
 176 static rtx arm_libcall_value (machine_mode, const_rtx);
 177 static bool arm_function_value_regno_p (const unsigned int);
 178 static void arm_internal_label (FILE *, const char *, unsigned long);
 179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 180                                  tree);
 181 static bool arm_have_conditional_execution (void);
 182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 183 static bool arm_legitimate_constant_p (machine_mode, rtx);
 184 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 185 static int arm_insn_cost (rtx_insn *, bool);
 186 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 187 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 188 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 189 static void emit_constant_insn (rtx cond, rtx pattern);
 190 static rtx_insn *emit_set_insn (rtx, rtx);
 191 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 193 static void arm_emit_multi_reg_pop (unsigned long);
 194 static int vfp_emit_fstmd (int, int);
 195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 196 static int arm_arg_partial_bytes (cumulative_args_t,
 197                                   const function_arg_info &);
 198 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 199 static void arm_function_arg_advance (cumulative_args_t,
 200                                       const function_arg_info &);
 201 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 202 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 203 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 204                                       const_tree);
 205 static rtx aapcs_libcall_value (machine_mode);
 206 static int aapcs_select_return_coproc (const_tree, const_tree);
 207
 208 #ifdef OBJECT_FORMAT_ELF
 209 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 210 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 211 #endif
 212 #ifndef ARM_PE
 213 static void arm_encode_section_info (tree, rtx, int);
 214 #endif
 215
 216 static void arm_file_end (void);
 217 static void arm_file_start (void);
 218 static void arm_insert_attributes (tree, tree *);
 219
 220 static void arm_setup_incoming_varargs (cumulative_args_t,
 221                                         const function_arg_info &, int *, int);
 222 static bool arm_pass_by_reference (cumulative_args_t,
 223                                    const function_arg_info &);
 224 static bool arm_promote_prototypes (const_tree);
 225 static bool arm_default_short_enums (void);
 226 static bool arm_align_anon_bitfield (void);
 227 static bool arm_return_in_msb (const_tree);
 228 static bool arm_must_pass_in_stack (const function_arg_info &);
 229 static bool arm_return_in_memory (const_tree, const_tree);
 230 #if ARM_UNWIND_INFO
 231 static void arm_unwind_emit (FILE *, rtx_insn *);
 232 static bool arm_output_ttype (rtx);
 233 static void arm_asm_emit_except_personality (rtx);
 234 #endif
 235 static void arm_asm_init_sections (void);
 236 static rtx arm_dwarf_register_span (rtx);
 237
 238 static tree arm_cxx_guard_type (void);
 239 static bool arm_cxx_guard_mask_bit (void);
 240 static tree arm_get_cookie_size (tree);
 241 static bool arm_cookie_has_size (void);
 242 static bool arm_cxx_cdtor_returns_this (void);
 243 static bool arm_cxx_key_method_may_be_inline (void);
 244 static void arm_cxx_determine_class_data_visibility (tree);
 245 static bool arm_cxx_class_data_always_comdat (void);
 246 static bool arm_cxx_use_aeabi_atexit (void);
 247 static void arm_init_libfuncs (void);
 248 static tree arm_build_builtin_va_list (void);
 249 static void arm_expand_builtin_va_start (tree, rtx);
 250 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 251 static void arm_option_override (void);
 252 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 253                                 struct cl_target_option *);
 254 static void arm_override_options_after_change (void);
 255 static void arm_option_print (FILE *, int, struct cl_target_option *);
 256 static void arm_set_current_function (tree);
 257 static bool arm_can_inline_p (tree, tree);
 258 static void arm_relayout_function (tree);
 259 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 260 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 261 static bool arm_sched_can_speculate_insn (rtx_insn *);
 262 static bool arm_macro_fusion_p (void);
 263 static bool arm_cannot_copy_insn_p (rtx_insn *);
 264 static int arm_issue_rate (void);
 265 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 266 static int arm_first_cycle_multipass_dfa_lookahead (void);
 267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 268 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 269 static bool arm_output_addr_const_extra (FILE *, rtx);
 270 static bool arm_allocate_stack_slots_for_args (void);
 271 static bool arm_warn_func_return (tree);
 272 static tree arm_promoted_type (const_tree t);
 273 static bool arm_scalar_mode_supported_p (scalar_mode);
 274 static bool arm_frame_pointer_required (void);
 275 static bool arm_can_eliminate (const int, const int);
 276 static void arm_asm_trampoline_template (FILE *);
 277 static void arm_trampoline_init (rtx, tree, rtx);
 278 static rtx arm_trampoline_adjust_address (rtx);
 279 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 280 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 281 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 282 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 283 static bool arm_array_mode_supported_p (machine_mode,
 284                                         unsigned HOST_WIDE_INT);
 285 static machine_mode arm_preferred_simd_mode (scalar_mode);
 286 static bool arm_class_likely_spilled_p (reg_class_t);
 287 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 288 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 289 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 290                                                      const_tree type,
 291                                                      int misalignment,
 292                                                      bool is_packed);
 293 static void arm_conditional_register_usage (void);
 294 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 295 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 296 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 297 static int arm_default_branch_cost (bool, bool);
 298 static int arm_cortex_a5_branch_cost (bool, bool);
 299 static int arm_cortex_m_branch_cost (bool, bool);
 300 static int arm_cortex_m7_branch_cost (bool, bool);
 301
 302 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
 303                                           rtx, const vec_perm_indices &);
 304
 305 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 306
 307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 308                                            tree vectype,
 309                                            int misalign ATTRIBUTE_UNUSED);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314
 315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 317                                      const_tree);
 318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 321                                                 int reloc);
 322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 328 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 329                                        vec<machine_mode> &,
 330                                        vec<const char *> &, vec<rtx> &,
 331                                        vec<rtx> &, HARD_REG_SET &, location_t);
 332 static const char *arm_identify_fpu_from_isa (sbitmap);
 333 \f
 334 /* Table of machine attributes.  */
 335 static const attribute_spec arm_gnu_attributes[] =
 336 {
 337   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 338        affects_type_identity, handler, exclude } */
 339   /* Function calls made to this symbol must be done indirectly, because
 340      it may lie outside of the 26 bit addressing range of a normal function
 341      call.  */
 342   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 343   /* Whereas these functions are always known to reside within the 26 bit
 344      addressing range.  */
 345   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 346   /* Specify the procedure call conventions for a function.  */
 347   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 348     NULL },
 349   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 350   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 351     NULL },
 352   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 353     NULL },
 354   { "naked",        0, 0, true,  false, false, false,
 355     arm_handle_fndecl_attribute, NULL },
 356 #ifdef ARM_PE
 357   /* ARM/PE has three new attributes:
 358      interfacearm - ?
 359      dllexport - for exporting a function/variable that will live in a dll
 360      dllimport - for importing a function/variable from a dll
 361
 362      Microsoft allows multiple declspecs in one __declspec, separating
 363      them with spaces.  We do NOT support this.  Instead, use __declspec
 364      multiple times.
 365   */
 366   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 367   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 368   { "interfacearm", 0, 0, true,  false, false, false,
 369     arm_handle_fndecl_attribute, NULL },
 370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 371   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 372     NULL },
 373   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 374     NULL },
 375   { "notshared",    0, 0, false, true, false, false,
 376     arm_handle_notshared_attribute, NULL },
 377 #endif
 378   /* ARMv8-M Security Extensions support.  */
 379   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 380     arm_handle_cmse_nonsecure_entry, NULL },
 381   { "cmse_nonsecure_call", 0, 0, false, false, false, true,
 382     arm_handle_cmse_nonsecure_call, NULL },
 383   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
 384 };
 385
 386 static const scoped_attribute_specs arm_gnu_attribute_table =
 387 {
 388   "gnu", { arm_gnu_attributes }
 389 };
 390
 391 static const scoped_attribute_specs *const arm_attribute_table[] =
 392 {
 393   &arm_gnu_attribute_table
 394 };
 395 \f
 396 /* Initialize the GCC target structure.  */
 397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 398 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 400 #endif
 401
 402 #undef TARGET_CHECK_BUILTIN_CALL
 403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 404
 405 #undef TARGET_LEGITIMIZE_ADDRESS
 406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 407
 408 #undef  TARGET_ATTRIBUTE_TABLE
 409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 410
 411 #undef  TARGET_INSERT_ATTRIBUTES
 412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 413
 414 #undef TARGET_ASM_FILE_START
 415 #define TARGET_ASM_FILE_START arm_file_start
 416 #undef TARGET_ASM_FILE_END
 417 #define TARGET_ASM_FILE_END arm_file_end
 418
 419 #undef  TARGET_ASM_ALIGNED_SI_OP
 420 #define TARGET_ASM_ALIGNED_SI_OP NULL
 421 #undef  TARGET_ASM_INTEGER
 422 #define TARGET_ASM_INTEGER arm_assemble_integer
 423
 424 #undef TARGET_PRINT_OPERAND
 425 #define TARGET_PRINT_OPERAND arm_print_operand
 426 #undef TARGET_PRINT_OPERAND_ADDRESS
 427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 430
 431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 433
 434 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 436
 437 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 439
 440 #undef TARGET_CAN_INLINE_P
 441 #define TARGET_CAN_INLINE_P arm_can_inline_p
 442
 443 #undef TARGET_RELAYOUT_FUNCTION
 444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 445
 446 #undef  TARGET_OPTION_OVERRIDE
 447 #define TARGET_OPTION_OVERRIDE arm_option_override
 448
 449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 451
 452 #undef TARGET_OPTION_RESTORE
 453 #define TARGET_OPTION_RESTORE arm_option_restore
 454
 455 #undef TARGET_OPTION_PRINT
 456 #define TARGET_OPTION_PRINT arm_option_print
 457
 458 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 460
 461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 463
 464 #undef TARGET_SCHED_MACRO_FUSION_P
 465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 466
 467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 469
 470 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 472
 473 #undef  TARGET_SCHED_ADJUST_COST
 474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 475
 476 #undef TARGET_SET_CURRENT_FUNCTION
 477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 478
 479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 481
 482 #undef TARGET_SCHED_REORDER
 483 #define TARGET_SCHED_REORDER arm_sched_reorder
 484
 485 #undef TARGET_REGISTER_MOVE_COST
 486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 487
 488 #undef TARGET_MEMORY_MOVE_COST
 489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 490
 491 #undef TARGET_ENCODE_SECTION_INFO
 492 #ifdef ARM_PE
 493 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 494 #else
 495 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 496 #endif
 497
 498 #undef  TARGET_STRIP_NAME_ENCODING
 499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 500
 501 #undef  TARGET_ASM_INTERNAL_LABEL
 502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 503
 504 #undef TARGET_FLOATN_MODE
 505 #define TARGET_FLOATN_MODE arm_floatn_mode
 506
 507 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 509
 510 #undef  TARGET_FUNCTION_VALUE
 511 #define TARGET_FUNCTION_VALUE arm_function_value
 512
 513 #undef  TARGET_LIBCALL_VALUE
 514 #define TARGET_LIBCALL_VALUE arm_libcall_value
 515
 516 #undef TARGET_FUNCTION_VALUE_REGNO_P
 517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 518
 519 #undef TARGET_GIMPLE_FOLD_BUILTIN
 520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
 521
 522 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 524 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 526
 527 #undef  TARGET_RTX_COSTS
 528 #define TARGET_RTX_COSTS arm_rtx_costs
 529 #undef  TARGET_ADDRESS_COST
 530 #define TARGET_ADDRESS_COST arm_address_cost
 531 #undef TARGET_INSN_COST
 532 #define TARGET_INSN_COST arm_insn_cost
 533
 534 #undef TARGET_SHIFT_TRUNCATION_MASK
 535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 544   arm_autovectorize_vector_modes
 545
 546 #undef  TARGET_MACHINE_DEPENDENT_REORG
 547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 548
 549 #undef  TARGET_INIT_BUILTINS
 550 #define TARGET_INIT_BUILTINS  arm_init_builtins
 551 #undef  TARGET_EXPAND_BUILTIN
 552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 553 #undef  TARGET_BUILTIN_DECL
 554 #define TARGET_BUILTIN_DECL arm_builtin_decl
 555
 556 #undef TARGET_INIT_LIBFUNCS
 557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 558
 559 #undef TARGET_PROMOTE_FUNCTION_MODE
 560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 561 #undef TARGET_PROMOTE_PROTOTYPES
 562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 563 #undef TARGET_PASS_BY_REFERENCE
 564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 565 #undef TARGET_ARG_PARTIAL_BYTES
 566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 567 #undef TARGET_FUNCTION_ARG
 568 #define TARGET_FUNCTION_ARG arm_function_arg
 569 #undef TARGET_FUNCTION_ARG_ADVANCE
 570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 571 #undef TARGET_FUNCTION_ARG_PADDING
 572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 573 #undef TARGET_FUNCTION_ARG_BOUNDARY
 574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 575
 576 #undef  TARGET_SETUP_INCOMING_VARARGS
 577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 578
 579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 581
 582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 584 #undef TARGET_TRAMPOLINE_INIT
 585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 588
 589 #undef TARGET_WARN_FUNC_RETURN
 590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 591
 592 #undef TARGET_DEFAULT_SHORT_ENUMS
 593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 594
 595 #undef TARGET_ALIGN_ANON_BITFIELD
 596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 597
 598 #undef TARGET_NARROW_VOLATILE_BITFIELD
 599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 600
 601 #undef TARGET_CXX_GUARD_TYPE
 602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 603
 604 #undef TARGET_CXX_GUARD_MASK_BIT
 605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 606
 607 #undef TARGET_CXX_GET_COOKIE_SIZE
 608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 609
 610 #undef TARGET_CXX_COOKIE_HAS_SIZE
 611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 612
 613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 615
 616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 618
 619 #undef TARGET_CXX_USE_AEABI_ATEXIT
 620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 621
 622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 624   arm_cxx_determine_class_data_visibility
 625
 626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 628
 629 #undef TARGET_RETURN_IN_MSB
 630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 631
 632 #undef TARGET_RETURN_IN_MEMORY
 633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 634
 635 #undef TARGET_MUST_PASS_IN_STACK
 636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 637
 638 #if ARM_UNWIND_INFO
 639 #undef TARGET_ASM_UNWIND_EMIT
 640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 641
 642 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 643 #undef TARGET_ASM_TTYPE
 644 #define TARGET_ASM_TTYPE arm_output_ttype
 645
 646 #undef TARGET_ARM_EABI_UNWINDER
 647 #define TARGET_ARM_EABI_UNWINDER true
 648
 649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 651
 652 #endif /* ARM_UNWIND_INFO */
 653
 654 #undef TARGET_ASM_INIT_SECTIONS
 655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 656
 657 #undef TARGET_DWARF_REGISTER_SPAN
 658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 659
 660 #undef  TARGET_CANNOT_COPY_INSN_P
 661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 662
 663 #ifdef HAVE_AS_TLS
 664 #undef TARGET_HAVE_TLS
 665 #define TARGET_HAVE_TLS true
 666 #endif
 667
 668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 670
 671 #undef TARGET_LEGITIMATE_CONSTANT_P
 672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 673
 674 #undef TARGET_CANNOT_FORCE_CONST_MEM
 675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 676
 677 #undef TARGET_MAX_ANCHOR_OFFSET
 678 #define TARGET_MAX_ANCHOR_OFFSET 4095
 679
 680 /* The minimum is set such that the total size of the block
 681    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 682    divisible by eight, ensuring natural spacing of anchors.  */
 683 #undef TARGET_MIN_ANCHOR_OFFSET
 684 #define TARGET_MIN_ANCHOR_OFFSET -4088
 685
 686 #undef TARGET_SCHED_ISSUE_RATE
 687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 688
 689 #undef TARGET_SCHED_VARIABLE_ISSUE
 690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 691
 692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 694   arm_first_cycle_multipass_dfa_lookahead
 695
 696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 698   arm_first_cycle_multipass_dfa_lookahead_guard
 699
 700 #undef TARGET_MANGLE_TYPE
 701 #define TARGET_MANGLE_TYPE arm_mangle_type
 702
 703 #undef TARGET_INVALID_CONVERSION
 704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 705
 706 #undef TARGET_INVALID_UNARY_OP
 707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 708
 709 #undef TARGET_INVALID_BINARY_OP
 710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 711
 712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 714
 715 #undef TARGET_BUILD_BUILTIN_VA_LIST
 716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 717 #undef TARGET_EXPAND_BUILTIN_VA_START
 718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 721
 722 #ifdef HAVE_AS_TLS
 723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 725 #endif
 726
 727 #undef TARGET_LEGITIMATE_ADDRESS_P
 728 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 729
 730 #undef TARGET_PREFERRED_RELOAD_CLASS
 731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 732
 733 #undef TARGET_PROMOTED_TYPE
 734 #define TARGET_PROMOTED_TYPE arm_promoted_type
 735
 736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 738
 739 #undef TARGET_COMPUTE_FRAME_LAYOUT
 740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 741
 742 #undef TARGET_FRAME_POINTER_REQUIRED
 743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 744
 745 #undef TARGET_CAN_ELIMINATE
 746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 747
 748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 750
 751 #undef TARGET_CLASS_LIKELY_SPILLED_P
 752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 753
 754 #undef TARGET_VECTORIZE_BUILTINS
 755 #define TARGET_VECTORIZE_BUILTINS
 756
 757 #undef TARGET_VECTOR_ALIGNMENT
 758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 759
 760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 762   arm_vector_alignment_reachable
 763
 764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 766   arm_builtin_support_vector_misalignment
 767
 768 #undef TARGET_PREFERRED_RENAME_CLASS
 769 #define TARGET_PREFERRED_RENAME_CLASS \
 770   arm_preferred_rename_class
 771
 772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 774
 775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 777   arm_builtin_vectorization_cost
 778
 779 #undef TARGET_CANONICALIZE_COMPARISON
 780 #define TARGET_CANONICALIZE_COMPARISON \
 781   arm_canonicalize_comparison
 782
 783 #undef TARGET_ASAN_SHADOW_OFFSET
 784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 785
 786 #undef MAX_INSN_PER_IT_BLOCK
 787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 788
 789 #undef TARGET_CAN_USE_DOLOOP_P
 790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 791
 792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 794
 795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 797
 798 #undef TARGET_SCHED_FUSION_PRIORITY
 799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 800
 801 #undef  TARGET_ASM_FUNCTION_SECTION
 802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 803
 804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 806
 807 #undef TARGET_SECTION_TYPE_FLAGS
 808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 809
 810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 812
 813 #undef TARGET_C_EXCESS_PRECISION
 814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 815
 816 /* Although the architecture reserves bits 0 and 1, only the former is
 817    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 820
 821 #undef TARGET_FIXED_CONDITION_CODE_REGS
 822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 823
 824 #undef TARGET_HARD_REGNO_NREGS
 825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 826 #undef TARGET_HARD_REGNO_MODE_OK
 827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 828
 829 #undef TARGET_MODES_TIEABLE_P
 830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 831
 832 #undef TARGET_CAN_CHANGE_MODE_CLASS
 833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 834
 835 #undef TARGET_CONSTANT_ALIGNMENT
 836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 837
 838 #undef TARGET_INVALID_WITHIN_DOLOOP
 839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 840
 841 #undef TARGET_MD_ASM_ADJUST
 842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 843
 844 #undef TARGET_STACK_PROTECT_GUARD
 845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 846
 847 #undef TARGET_VECTORIZE_GET_MASK_MODE
 848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 849 \f
 850 /* Obstack for minipool constant handling.  */
 851 static struct obstack minipool_obstack;
 852 static char *         minipool_startobj;
 853
 854 /* The maximum number of insns skipped which
 855    will be conditionalised if possible.  */
 856 static int max_insns_skipped = 5;
 857
 858 /* True if we are currently building a constant table.  */
 859 int making_const_table;
 860
 861 /* The processor for which instructions should be scheduled.  */
 862 enum processor_type arm_tune = TARGET_CPU_arm_none;
 863
 864 /* The current tuning set.  */
 865 const struct tune_params *current_tune;
 866
 867 /* Which floating point hardware to schedule for.  */
 868 int arm_fpu_attr;
 869
 870 /* Used for Thumb call_via trampolines.  */
 871 rtx thumb_call_via_label[14];
 872 static int thumb_call_reg_needed;
 873
 874 /* The bits in this mask specify which instruction scheduling options should
 875    be used.  */
 876 unsigned int tune_flags = 0;
 877
 878 /* The highest ARM architecture version supported by the
 879    target.  */
 880 enum base_architecture arm_base_arch = BASE_ARCH_0;
 881
 882 /* Active target architecture and tuning.  */
 883
 884 struct arm_build_target arm_active_target;
 885
 886 /* The following are used in the arm.md file as equivalents to bits
 887    in the above two flag variables.  */
 888
 889 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 890 int arm_arch4 = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 893 int arm_arch4t = 0;
 894
 895 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 896 int arm_arch5t = 0;
 897
 898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 899 int arm_arch5te = 0;
 900
 901 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 902 int arm_arch6 = 0;
 903
 904 /* Nonzero if this chip supports the ARM 6K extensions.  */
 905 int arm_arch6k = 0;
 906
 907 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 908 int arm_arch6kz = 0;
 909
 910 /* Nonzero if instructions present in ARMv6-M can be used.  */
 911 int arm_arch6m = 0;
 912
 913 /* Nonzero if this chip supports the ARM 7 extensions.  */
 914 int arm_arch7 = 0;
 915
 916 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 917 int arm_arch_lpae = 0;
 918
 919 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 920 int arm_arch_notm = 0;
 921
 922 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 923 int arm_arch7em = 0;
 924
 925 /* Nonzero if instructions present in ARMv8 can be used.  */
 926 int arm_arch8 = 0;
 927
 928 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 929 int arm_arch8_1 = 0;
 930
 931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 932 int arm_arch8_2 = 0;
 933
 934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 935 int arm_arch8_3 = 0;
 936
 937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 938 int arm_arch8_4 = 0;
 939
 940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
 941    extensions.  */
 942 int arm_arch8m_main = 0;
 943
 944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 945    extensions.  */
 946 int arm_arch8_1m_main = 0;
 947
 948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 949    Architecture 8.2.  */
 950 int arm_fp16_inst = 0;
 951
 952 /* Nonzero if this chip can benefit from load scheduling.  */
 953 int arm_ld_sched = 0;
 954
 955 /* Nonzero if this chip is a StrongARM.  */
 956 int arm_tune_strongarm = 0;
 957
 958 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 959 int arm_arch_iwmmxt = 0;
 960
 961 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 962 int arm_arch_iwmmxt2 = 0;
 963
 964 /* Nonzero if this chip is an XScale.  */
 965 int arm_arch_xscale = 0;
 966
 967 /* Nonzero if tuning for XScale  */
 968 int arm_tune_xscale = 0;
 969
 970 /* Nonzero if we want to tune for stores that access the write-buffer.
 971    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 972 int arm_tune_wbuf = 0;
 973
 974 /* Nonzero if tuning for Cortex-A9.  */
 975 int arm_tune_cortex_a9 = 0;
 976
 977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 978    preprocessor.
 979    XXX This is a bit of a hack, it's intended to help work around
 980    problems in GLD which doesn't understand that armv5t code is
 981    interworking clean.  */
 982 int arm_cpp_interwork = 0;
 983
 984 /* Nonzero if chip supports Thumb 1.  */
 985 int arm_arch_thumb1;
 986
 987 /* Nonzero if chip supports Thumb 2.  */
 988 int arm_arch_thumb2;
 989
 990 /* Nonzero if chip supports integer division instruction.  */
 991 int arm_arch_arm_hwdiv;
 992 int arm_arch_thumb_hwdiv;
 993
 994 /* Nonzero if chip disallows volatile memory access in IT block.  */
 995 int arm_arch_no_volatile_ce;
 996
 997 /* Nonzero if we shouldn't use literal pools.  */
 998 bool arm_disable_literal_pool = false;
 999
1000 /* The register number to be used for the PIC offset register.  */
1001 unsigned arm_pic_register = INVALID_REGNUM;
1002
1003 enum arm_pcs arm_pcs_default;
1004
1005 /* For an explanation of these variables, see final_prescan_insn below.  */
1006 int arm_ccfsm_state;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
1008 enum arm_cond_code arm_current_cc;
1009
1010 rtx arm_target_insn;
1011 int arm_target_label;
1012 /* The number of conditionally executed insns, including the current insn.  */
1013 int arm_condexec_count = 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015    Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask = 0;
1017 /* The number of bits used in arm_condexec_mask.  */
1018 int arm_condexec_masklen = 0;
1019
1020 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1021 int arm_arch_crc = 0;
1022
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1024 int arm_arch_dotprod = 0;
1025
1026 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1027 int arm_arch_cmse = 0;
1028
1029 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1030 int arm_m_profile_small_mul = 0;
1031
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1033 int arm_arch_i8mm = 0;
1034
1035 /* Nonzero if chip supports the BFloat16 instructions.  */
1036 int arm_arch_bf16 = 0;
1037
1038 /* Nonzero if chip supports the Custom Datapath Extension.  */
1039 int arm_arch_cde = 0;
1040 int arm_arch_cde_coproc = 0;
1041 const int arm_arch_cde_coproc_bits[] = {
1042   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1043 };
1044
1045 /* The condition codes of the ARM, and the inverse function.  */
1046 static const char * const arm_condition_codes[] =
1047 {
1048   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1050 };
1051
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1053 int arm_regs_in_sequence[] =
1054 {
1055   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1056 };
1057
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1060   FP_SYSREGS
1061 };
1062 #undef DEF_FP_SYSREG
1063
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1066
1067 #define THUMB2_WORK_REGS                                        \
1068   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)              \
1069             | (1 << SP_REGNUM)                                  \
1070             | (1 << PC_REGNUM)                                  \
1071             | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM        \
1072                ? (1 << PIC_OFFSET_TABLE_REGNUM)                 \
1073                : 0)))
1074 \f
1075 /* Initialization code.  */
1076
1077 struct cpu_tune
1078 {
1079   enum processor_type scheduler;
1080   unsigned int tune_flags;
1081   const struct tune_params *tune;
1082 };
1083
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1086   {                                                             \
1087     num_slots,                                                  \
1088     l1_size,                                                    \
1089     l1_line_size                                                \
1090   }
1091
1092 /* arm generic vectorizer costs.  */
1093 static const
1094 struct cpu_vec_costs arm_default_vec_cost = {
1095   1,                                    /* scalar_stmt_cost.  */
1096   1,                                    /* scalar load_cost.  */
1097   1,                                    /* scalar_store_cost.  */
1098   1,                                    /* vec_stmt_cost.  */
1099   1,                                    /* vec_to_scalar_cost.  */
1100   1,                                    /* scalar_to_vec_cost.  */
1101   1,                                    /* vec_align_load_cost.  */
1102   1,                                    /* vec_unalign_load_cost.  */
1103   1,                                    /* vec_unalign_store_cost.  */
1104   1,                                    /* vec_store_cost.  */
1105   3,                                    /* cond_taken_branch_cost.  */
1106   1,                                    /* cond_not_taken_branch_cost.  */
1107 };
1108
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1110 #include "aarch-cost-tables.h"
1111
1112
1113
1114 const struct cpu_cost_table cortexa9_extra_costs =
1115 {
1116   /* ALU */
1117   {
1118     0,                  /* arith.  */
1119     0,                  /* logical.  */
1120     0,                  /* shift.  */
1121     COSTS_N_INSNS (1),  /* shift_reg.  */
1122     COSTS_N_INSNS (1),  /* arith_shift.  */
1123     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1124     0,                  /* log_shift.  */
1125     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1126     COSTS_N_INSNS (1),  /* extend.  */
1127     COSTS_N_INSNS (2),  /* extend_arith.  */
1128     COSTS_N_INSNS (1),  /* bfi.  */
1129     COSTS_N_INSNS (1),  /* bfx.  */
1130     0,                  /* clz.  */
1131     0,                  /* rev.  */
1132     0,                  /* non_exec.  */
1133     true                /* non_exec_costs_exec.  */
1134   },
1135   {
1136     /* MULT SImode */
1137     {
1138       COSTS_N_INSNS (3),        /* simple.  */
1139       COSTS_N_INSNS (3),        /* flag_setting.  */
1140       COSTS_N_INSNS (2),        /* extend.  */
1141       COSTS_N_INSNS (3),        /* add.  */
1142       COSTS_N_INSNS (2),        /* extend_add.  */
1143       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1144     },
1145     /* MULT DImode */
1146     {
1147       0,                        /* simple (N/A).  */
1148       0,                        /* flag_setting (N/A).  */
1149       COSTS_N_INSNS (4),        /* extend.  */
1150       0,                        /* add (N/A).  */
1151       COSTS_N_INSNS (4),        /* extend_add.  */
1152       0                         /* idiv (N/A).  */
1153     }
1154   },
1155   /* LD/ST */
1156   {
1157     COSTS_N_INSNS (2),  /* load.  */
1158     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1159     COSTS_N_INSNS (2),  /* ldrd.  */
1160     COSTS_N_INSNS (2),  /* ldm_1st.  */
1161     1,                  /* ldm_regs_per_insn_1st.  */
1162     2,                  /* ldm_regs_per_insn_subsequent.  */
1163     COSTS_N_INSNS (5),  /* loadf.  */
1164     COSTS_N_INSNS (5),  /* loadd.  */
1165     COSTS_N_INSNS (1),  /* load_unaligned.  */
1166     COSTS_N_INSNS (2),  /* store.  */
1167     COSTS_N_INSNS (2),  /* strd.  */
1168     COSTS_N_INSNS (2),  /* stm_1st.  */
1169     1,                  /* stm_regs_per_insn_1st.  */
1170     2,                  /* stm_regs_per_insn_subsequent.  */
1171     COSTS_N_INSNS (1),  /* storef.  */
1172     COSTS_N_INSNS (1),  /* stored.  */
1173     COSTS_N_INSNS (1),  /* store_unaligned.  */
1174     COSTS_N_INSNS (1),  /* loadv.  */
1175     COSTS_N_INSNS (1)   /* storev.  */
1176   },
1177   {
1178     /* FP SFmode */
1179     {
1180       COSTS_N_INSNS (14),       /* div.  */
1181       COSTS_N_INSNS (4),        /* mult.  */
1182       COSTS_N_INSNS (7),        /* mult_addsub. */
1183       COSTS_N_INSNS (30),       /* fma.  */
1184       COSTS_N_INSNS (3),        /* addsub.  */
1185       COSTS_N_INSNS (1),        /* fpconst.  */
1186       COSTS_N_INSNS (1),        /* neg.  */
1187       COSTS_N_INSNS (3),        /* compare.  */
1188       COSTS_N_INSNS (3),        /* widen.  */
1189       COSTS_N_INSNS (3),        /* narrow.  */
1190       COSTS_N_INSNS (3),        /* toint.  */
1191       COSTS_N_INSNS (3),        /* fromint.  */
1192       COSTS_N_INSNS (3)         /* roundint.  */
1193     },
1194     /* FP DFmode */
1195     {
1196       COSTS_N_INSNS (24),       /* div.  */
1197       COSTS_N_INSNS (5),        /* mult.  */
1198       COSTS_N_INSNS (8),        /* mult_addsub.  */
1199       COSTS_N_INSNS (30),       /* fma.  */
1200       COSTS_N_INSNS (3),        /* addsub.  */
1201       COSTS_N_INSNS (1),        /* fpconst.  */
1202       COSTS_N_INSNS (1),        /* neg.  */
1203       COSTS_N_INSNS (3),        /* compare.  */
1204       COSTS_N_INSNS (3),        /* widen.  */
1205       COSTS_N_INSNS (3),        /* narrow.  */
1206       COSTS_N_INSNS (3),        /* toint.  */
1207       COSTS_N_INSNS (3),        /* fromint.  */
1208       COSTS_N_INSNS (3)         /* roundint.  */
1209     }
1210   },
1211   /* Vector */
1212   {
1213     COSTS_N_INSNS (1),  /* alu.  */
1214     COSTS_N_INSNS (4),  /* mult.  */
1215     COSTS_N_INSNS (1),  /* movi.  */
1216     COSTS_N_INSNS (2),  /* dup.  */
1217     COSTS_N_INSNS (2)   /* extract.  */
1218   }
1219 };
1220
1221 const struct cpu_cost_table cortexa8_extra_costs =
1222 {
1223   /* ALU */
1224   {
1225     0,                  /* arith.  */
1226     0,                  /* logical.  */
1227     COSTS_N_INSNS (1),  /* shift.  */
1228     0,                  /* shift_reg.  */
1229     COSTS_N_INSNS (1),  /* arith_shift.  */
1230     0,                  /* arith_shift_reg.  */
1231     COSTS_N_INSNS (1),  /* log_shift.  */
1232     0,                  /* log_shift_reg.  */
1233     0,                  /* extend.  */
1234     0,                  /* extend_arith.  */
1235     0,                  /* bfi.  */
1236     0,                  /* bfx.  */
1237     0,                  /* clz.  */
1238     0,                  /* rev.  */
1239     0,                  /* non_exec.  */
1240     true                /* non_exec_costs_exec.  */
1241   },
1242   {
1243     /* MULT SImode */
1244     {
1245       COSTS_N_INSNS (1),        /* simple.  */
1246       COSTS_N_INSNS (1),        /* flag_setting.  */
1247       COSTS_N_INSNS (1),        /* extend.  */
1248       COSTS_N_INSNS (1),        /* add.  */
1249       COSTS_N_INSNS (1),        /* extend_add.  */
1250       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1251     },
1252     /* MULT DImode */
1253     {
1254       0,                        /* simple (N/A).  */
1255       0,                        /* flag_setting (N/A).  */
1256       COSTS_N_INSNS (2),        /* extend.  */
1257       0,                        /* add (N/A).  */
1258       COSTS_N_INSNS (2),        /* extend_add.  */
1259       0                         /* idiv (N/A).  */
1260     }
1261   },
1262   /* LD/ST */
1263   {
1264     COSTS_N_INSNS (1),  /* load.  */
1265     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1266     COSTS_N_INSNS (1),  /* ldrd.  */
1267     COSTS_N_INSNS (1),  /* ldm_1st.  */
1268     1,                  /* ldm_regs_per_insn_1st.  */
1269     2,                  /* ldm_regs_per_insn_subsequent.  */
1270     COSTS_N_INSNS (1),  /* loadf.  */
1271     COSTS_N_INSNS (1),  /* loadd.  */
1272     COSTS_N_INSNS (1),  /* load_unaligned.  */
1273     COSTS_N_INSNS (1),  /* store.  */
1274     COSTS_N_INSNS (1),  /* strd.  */
1275     COSTS_N_INSNS (1),  /* stm_1st.  */
1276     1,                  /* stm_regs_per_insn_1st.  */
1277     2,                  /* stm_regs_per_insn_subsequent.  */
1278     COSTS_N_INSNS (1),  /* storef.  */
1279     COSTS_N_INSNS (1),  /* stored.  */
1280     COSTS_N_INSNS (1),  /* store_unaligned.  */
1281     COSTS_N_INSNS (1),  /* loadv.  */
1282     COSTS_N_INSNS (1)   /* storev.  */
1283   },
1284   {
1285     /* FP SFmode */
1286     {
1287       COSTS_N_INSNS (36),       /* div.  */
1288       COSTS_N_INSNS (11),       /* mult.  */
1289       COSTS_N_INSNS (20),       /* mult_addsub. */
1290       COSTS_N_INSNS (30),       /* fma.  */
1291       COSTS_N_INSNS (9),        /* addsub.  */
1292       COSTS_N_INSNS (3),        /* fpconst.  */
1293       COSTS_N_INSNS (3),        /* neg.  */
1294       COSTS_N_INSNS (6),        /* compare.  */
1295       COSTS_N_INSNS (4),        /* widen.  */
1296       COSTS_N_INSNS (4),        /* narrow.  */
1297       COSTS_N_INSNS (8),        /* toint.  */
1298       COSTS_N_INSNS (8),        /* fromint.  */
1299       COSTS_N_INSNS (8)         /* roundint.  */
1300     },
1301     /* FP DFmode */
1302     {
1303       COSTS_N_INSNS (64),       /* div.  */
1304       COSTS_N_INSNS (16),       /* mult.  */
1305       COSTS_N_INSNS (25),       /* mult_addsub.  */
1306       COSTS_N_INSNS (30),       /* fma.  */
1307       COSTS_N_INSNS (9),        /* addsub.  */
1308       COSTS_N_INSNS (3),        /* fpconst.  */
1309       COSTS_N_INSNS (3),        /* neg.  */
1310       COSTS_N_INSNS (6),        /* compare.  */
1311       COSTS_N_INSNS (6),        /* widen.  */
1312       COSTS_N_INSNS (6),        /* narrow.  */
1313       COSTS_N_INSNS (8),        /* toint.  */
1314       COSTS_N_INSNS (8),        /* fromint.  */
1315       COSTS_N_INSNS (8)         /* roundint.  */
1316     }
1317   },
1318   /* Vector */
1319   {
1320     COSTS_N_INSNS (1),  /* alu.  */
1321     COSTS_N_INSNS (4),  /* mult.  */
1322     COSTS_N_INSNS (1),  /* movi.  */
1323     COSTS_N_INSNS (2),  /* dup.  */
1324     COSTS_N_INSNS (2)   /* extract.  */
1325   }
1326 };
1327
1328 const struct cpu_cost_table cortexa5_extra_costs =
1329 {
1330   /* ALU */
1331   {
1332     0,                  /* arith.  */
1333     0,                  /* logical.  */
1334     COSTS_N_INSNS (1),  /* shift.  */
1335     COSTS_N_INSNS (1),  /* shift_reg.  */
1336     COSTS_N_INSNS (1),  /* arith_shift.  */
1337     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1338     COSTS_N_INSNS (1),  /* log_shift.  */
1339     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1340     COSTS_N_INSNS (1),  /* extend.  */
1341     COSTS_N_INSNS (1),  /* extend_arith.  */
1342     COSTS_N_INSNS (1),  /* bfi.  */
1343     COSTS_N_INSNS (1),  /* bfx.  */
1344     COSTS_N_INSNS (1),  /* clz.  */
1345     COSTS_N_INSNS (1),  /* rev.  */
1346     0,                  /* non_exec.  */
1347     true                /* non_exec_costs_exec.  */
1348   },
1349
1350   {
1351     /* MULT SImode */
1352     {
1353       0,                        /* simple.  */
1354       COSTS_N_INSNS (1),        /* flag_setting.  */
1355       COSTS_N_INSNS (1),        /* extend.  */
1356       COSTS_N_INSNS (1),        /* add.  */
1357       COSTS_N_INSNS (1),        /* extend_add.  */
1358       COSTS_N_INSNS (7)         /* idiv.  */
1359     },
1360     /* MULT DImode */
1361     {
1362       0,                        /* simple (N/A).  */
1363       0,                        /* flag_setting (N/A).  */
1364       COSTS_N_INSNS (1),        /* extend.  */
1365       0,                        /* add.  */
1366       COSTS_N_INSNS (2),        /* extend_add.  */
1367       0                         /* idiv (N/A).  */
1368     }
1369   },
1370   /* LD/ST */
1371   {
1372     COSTS_N_INSNS (1),  /* load.  */
1373     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1374     COSTS_N_INSNS (6),  /* ldrd.  */
1375     COSTS_N_INSNS (1),  /* ldm_1st.  */
1376     1,                  /* ldm_regs_per_insn_1st.  */
1377     2,                  /* ldm_regs_per_insn_subsequent.  */
1378     COSTS_N_INSNS (2),  /* loadf.  */
1379     COSTS_N_INSNS (4),  /* loadd.  */
1380     COSTS_N_INSNS (1),  /* load_unaligned.  */
1381     COSTS_N_INSNS (1),  /* store.  */
1382     COSTS_N_INSNS (3),  /* strd.  */
1383     COSTS_N_INSNS (1),  /* stm_1st.  */
1384     1,                  /* stm_regs_per_insn_1st.  */
1385     2,                  /* stm_regs_per_insn_subsequent.  */
1386     COSTS_N_INSNS (2),  /* storef.  */
1387     COSTS_N_INSNS (2),  /* stored.  */
1388     COSTS_N_INSNS (1),  /* store_unaligned.  */
1389     COSTS_N_INSNS (1),  /* loadv.  */
1390     COSTS_N_INSNS (1)   /* storev.  */
1391   },
1392   {
1393     /* FP SFmode */
1394     {
1395       COSTS_N_INSNS (15),       /* div.  */
1396       COSTS_N_INSNS (3),        /* mult.  */
1397       COSTS_N_INSNS (7),        /* mult_addsub. */
1398       COSTS_N_INSNS (7),        /* fma.  */
1399       COSTS_N_INSNS (3),        /* addsub.  */
1400       COSTS_N_INSNS (3),        /* fpconst.  */
1401       COSTS_N_INSNS (3),        /* neg.  */
1402       COSTS_N_INSNS (3),        /* compare.  */
1403       COSTS_N_INSNS (3),        /* widen.  */
1404       COSTS_N_INSNS (3),        /* narrow.  */
1405       COSTS_N_INSNS (3),        /* toint.  */
1406       COSTS_N_INSNS (3),        /* fromint.  */
1407       COSTS_N_INSNS (3)         /* roundint.  */
1408     },
1409     /* FP DFmode */
1410     {
1411       COSTS_N_INSNS (30),       /* div.  */
1412       COSTS_N_INSNS (6),        /* mult.  */
1413       COSTS_N_INSNS (10),       /* mult_addsub.  */
1414       COSTS_N_INSNS (7),        /* fma.  */
1415       COSTS_N_INSNS (3),        /* addsub.  */
1416       COSTS_N_INSNS (3),        /* fpconst.  */
1417       COSTS_N_INSNS (3),        /* neg.  */
1418       COSTS_N_INSNS (3),        /* compare.  */
1419       COSTS_N_INSNS (3),        /* widen.  */
1420       COSTS_N_INSNS (3),        /* narrow.  */
1421       COSTS_N_INSNS (3),        /* toint.  */
1422       COSTS_N_INSNS (3),        /* fromint.  */
1423       COSTS_N_INSNS (3)         /* roundint.  */
1424     }
1425   },
1426   /* Vector */
1427   {
1428     COSTS_N_INSNS (1),  /* alu.  */
1429     COSTS_N_INSNS (4),  /* mult.  */
1430     COSTS_N_INSNS (1),  /* movi.  */
1431     COSTS_N_INSNS (2),  /* dup.  */
1432     COSTS_N_INSNS (2)   /* extract.  */
1433   }
1434 };
1435
1436
1437 const struct cpu_cost_table cortexa7_extra_costs =
1438 {
1439   /* ALU */
1440   {
1441     0,                  /* arith.  */
1442     0,                  /* logical.  */
1443     COSTS_N_INSNS (1),  /* shift.  */
1444     COSTS_N_INSNS (1),  /* shift_reg.  */
1445     COSTS_N_INSNS (1),  /* arith_shift.  */
1446     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1447     COSTS_N_INSNS (1),  /* log_shift.  */
1448     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1449     COSTS_N_INSNS (1),  /* extend.  */
1450     COSTS_N_INSNS (1),  /* extend_arith.  */
1451     COSTS_N_INSNS (1),  /* bfi.  */
1452     COSTS_N_INSNS (1),  /* bfx.  */
1453     COSTS_N_INSNS (1),  /* clz.  */
1454     COSTS_N_INSNS (1),  /* rev.  */
1455     0,                  /* non_exec.  */
1456     true                /* non_exec_costs_exec.  */
1457   },
1458
1459   {
1460     /* MULT SImode */
1461     {
1462       0,                        /* simple.  */
1463       COSTS_N_INSNS (1),        /* flag_setting.  */
1464       COSTS_N_INSNS (1),        /* extend.  */
1465       COSTS_N_INSNS (1),        /* add.  */
1466       COSTS_N_INSNS (1),        /* extend_add.  */
1467       COSTS_N_INSNS (7)         /* idiv.  */
1468     },
1469     /* MULT DImode */
1470     {
1471       0,                        /* simple (N/A).  */
1472       0,                        /* flag_setting (N/A).  */
1473       COSTS_N_INSNS (1),        /* extend.  */
1474       0,                        /* add.  */
1475       COSTS_N_INSNS (2),        /* extend_add.  */
1476       0                         /* idiv (N/A).  */
1477     }
1478   },
1479   /* LD/ST */
1480   {
1481     COSTS_N_INSNS (1),  /* load.  */
1482     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1483     COSTS_N_INSNS (3),  /* ldrd.  */
1484     COSTS_N_INSNS (1),  /* ldm_1st.  */
1485     1,                  /* ldm_regs_per_insn_1st.  */
1486     2,                  /* ldm_regs_per_insn_subsequent.  */
1487     COSTS_N_INSNS (2),  /* loadf.  */
1488     COSTS_N_INSNS (2),  /* loadd.  */
1489     COSTS_N_INSNS (1),  /* load_unaligned.  */
1490     COSTS_N_INSNS (1),  /* store.  */
1491     COSTS_N_INSNS (3),  /* strd.  */
1492     COSTS_N_INSNS (1),  /* stm_1st.  */
1493     1,                  /* stm_regs_per_insn_1st.  */
1494     2,                  /* stm_regs_per_insn_subsequent.  */
1495     COSTS_N_INSNS (2),  /* storef.  */
1496     COSTS_N_INSNS (2),  /* stored.  */
1497     COSTS_N_INSNS (1),  /* store_unaligned.  */
1498     COSTS_N_INSNS (1),  /* loadv.  */
1499     COSTS_N_INSNS (1)   /* storev.  */
1500   },
1501   {
1502     /* FP SFmode */
1503     {
1504       COSTS_N_INSNS (15),       /* div.  */
1505       COSTS_N_INSNS (3),        /* mult.  */
1506       COSTS_N_INSNS (7),        /* mult_addsub. */
1507       COSTS_N_INSNS (7),        /* fma.  */
1508       COSTS_N_INSNS (3),        /* addsub.  */
1509       COSTS_N_INSNS (3),        /* fpconst.  */
1510       COSTS_N_INSNS (3),        /* neg.  */
1511       COSTS_N_INSNS (3),        /* compare.  */
1512       COSTS_N_INSNS (3),        /* widen.  */
1513       COSTS_N_INSNS (3),        /* narrow.  */
1514       COSTS_N_INSNS (3),        /* toint.  */
1515       COSTS_N_INSNS (3),        /* fromint.  */
1516       COSTS_N_INSNS (3)         /* roundint.  */
1517     },
1518     /* FP DFmode */
1519     {
1520       COSTS_N_INSNS (30),       /* div.  */
1521       COSTS_N_INSNS (6),        /* mult.  */
1522       COSTS_N_INSNS (10),       /* mult_addsub.  */
1523       COSTS_N_INSNS (7),        /* fma.  */
1524       COSTS_N_INSNS (3),        /* addsub.  */
1525       COSTS_N_INSNS (3),        /* fpconst.  */
1526       COSTS_N_INSNS (3),        /* neg.  */
1527       COSTS_N_INSNS (3),        /* compare.  */
1528       COSTS_N_INSNS (3),        /* widen.  */
1529       COSTS_N_INSNS (3),        /* narrow.  */
1530       COSTS_N_INSNS (3),        /* toint.  */
1531       COSTS_N_INSNS (3),        /* fromint.  */
1532       COSTS_N_INSNS (3)         /* roundint.  */
1533     }
1534   },
1535   /* Vector */
1536   {
1537     COSTS_N_INSNS (1),  /* alu.  */
1538     COSTS_N_INSNS (4),  /* mult.  */
1539     COSTS_N_INSNS (1),  /* movi.  */
1540     COSTS_N_INSNS (2),  /* dup.  */
1541     COSTS_N_INSNS (2)   /* extract.  */
1542   }
1543 };
1544
1545 const struct cpu_cost_table cortexa12_extra_costs =
1546 {
1547   /* ALU */
1548   {
1549     0,                  /* arith.  */
1550     0,                  /* logical.  */
1551     0,                  /* shift.  */
1552     COSTS_N_INSNS (1),  /* shift_reg.  */
1553     COSTS_N_INSNS (1),  /* arith_shift.  */
1554     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1555     COSTS_N_INSNS (1),  /* log_shift.  */
1556     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1557     0,                  /* extend.  */
1558     COSTS_N_INSNS (1),  /* extend_arith.  */
1559     0,                  /* bfi.  */
1560     COSTS_N_INSNS (1),  /* bfx.  */
1561     COSTS_N_INSNS (1),  /* clz.  */
1562     COSTS_N_INSNS (1),  /* rev.  */
1563     0,                  /* non_exec.  */
1564     true                /* non_exec_costs_exec.  */
1565   },
1566   /* MULT SImode */
1567   {
1568     {
1569       COSTS_N_INSNS (2),        /* simple.  */
1570       COSTS_N_INSNS (3),        /* flag_setting.  */
1571       COSTS_N_INSNS (2),        /* extend.  */
1572       COSTS_N_INSNS (3),        /* add.  */
1573       COSTS_N_INSNS (2),        /* extend_add.  */
1574       COSTS_N_INSNS (18)        /* idiv.  */
1575     },
1576     /* MULT DImode */
1577     {
1578       0,                        /* simple (N/A).  */
1579       0,                        /* flag_setting (N/A).  */
1580       COSTS_N_INSNS (3),        /* extend.  */
1581       0,                        /* add (N/A).  */
1582       COSTS_N_INSNS (3),        /* extend_add.  */
1583       0                         /* idiv (N/A).  */
1584     }
1585   },
1586   /* LD/ST */
1587   {
1588     COSTS_N_INSNS (3),  /* load.  */
1589     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1590     COSTS_N_INSNS (3),  /* ldrd.  */
1591     COSTS_N_INSNS (3),  /* ldm_1st.  */
1592     1,                  /* ldm_regs_per_insn_1st.  */
1593     2,                  /* ldm_regs_per_insn_subsequent.  */
1594     COSTS_N_INSNS (3),  /* loadf.  */
1595     COSTS_N_INSNS (3),  /* loadd.  */
1596     0,                  /* load_unaligned.  */
1597     0,                  /* store.  */
1598     0,                  /* strd.  */
1599     0,                  /* stm_1st.  */
1600     1,                  /* stm_regs_per_insn_1st.  */
1601     2,                  /* stm_regs_per_insn_subsequent.  */
1602     COSTS_N_INSNS (2),  /* storef.  */
1603     COSTS_N_INSNS (2),  /* stored.  */
1604     0,                  /* store_unaligned.  */
1605     COSTS_N_INSNS (1),  /* loadv.  */
1606     COSTS_N_INSNS (1)   /* storev.  */
1607   },
1608   {
1609     /* FP SFmode */
1610     {
1611       COSTS_N_INSNS (17),       /* div.  */
1612       COSTS_N_INSNS (4),        /* mult.  */
1613       COSTS_N_INSNS (8),        /* mult_addsub. */
1614       COSTS_N_INSNS (8),        /* fma.  */
1615       COSTS_N_INSNS (4),        /* addsub.  */
1616       COSTS_N_INSNS (2),        /* fpconst. */
1617       COSTS_N_INSNS (2),        /* neg.  */
1618       COSTS_N_INSNS (2),        /* compare.  */
1619       COSTS_N_INSNS (4),        /* widen.  */
1620       COSTS_N_INSNS (4),        /* narrow.  */
1621       COSTS_N_INSNS (4),        /* toint.  */
1622       COSTS_N_INSNS (4),        /* fromint.  */
1623       COSTS_N_INSNS (4)         /* roundint.  */
1624     },
1625     /* FP DFmode */
1626     {
1627       COSTS_N_INSNS (31),       /* div.  */
1628       COSTS_N_INSNS (4),        /* mult.  */
1629       COSTS_N_INSNS (8),        /* mult_addsub.  */
1630       COSTS_N_INSNS (8),        /* fma.  */
1631       COSTS_N_INSNS (4),        /* addsub.  */
1632       COSTS_N_INSNS (2),        /* fpconst.  */
1633       COSTS_N_INSNS (2),        /* neg.  */
1634       COSTS_N_INSNS (2),        /* compare.  */
1635       COSTS_N_INSNS (4),        /* widen.  */
1636       COSTS_N_INSNS (4),        /* narrow.  */
1637       COSTS_N_INSNS (4),        /* toint.  */
1638       COSTS_N_INSNS (4),        /* fromint.  */
1639       COSTS_N_INSNS (4)         /* roundint.  */
1640     }
1641   },
1642   /* Vector */
1643   {
1644     COSTS_N_INSNS (1),  /* alu.  */
1645     COSTS_N_INSNS (4),  /* mult.  */
1646     COSTS_N_INSNS (1),  /* movi.  */
1647     COSTS_N_INSNS (2),  /* dup.  */
1648     COSTS_N_INSNS (2)   /* extract.  */
1649   }
1650 };
1651
1652 const struct cpu_cost_table cortexa15_extra_costs =
1653 {
1654   /* ALU */
1655   {
1656     0,                  /* arith.  */
1657     0,                  /* logical.  */
1658     0,                  /* shift.  */
1659     0,                  /* shift_reg.  */
1660     COSTS_N_INSNS (1),  /* arith_shift.  */
1661     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1662     COSTS_N_INSNS (1),  /* log_shift.  */
1663     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1664     0,                  /* extend.  */
1665     COSTS_N_INSNS (1),  /* extend_arith.  */
1666     COSTS_N_INSNS (1),  /* bfi.  */
1667     0,                  /* bfx.  */
1668     0,                  /* clz.  */
1669     0,                  /* rev.  */
1670     0,                  /* non_exec.  */
1671     true                /* non_exec_costs_exec.  */
1672   },
1673   /* MULT SImode */
1674   {
1675     {
1676       COSTS_N_INSNS (2),        /* simple.  */
1677       COSTS_N_INSNS (3),        /* flag_setting.  */
1678       COSTS_N_INSNS (2),        /* extend.  */
1679       COSTS_N_INSNS (2),        /* add.  */
1680       COSTS_N_INSNS (2),        /* extend_add.  */
1681       COSTS_N_INSNS (18)        /* idiv.  */
1682     },
1683     /* MULT DImode */
1684     {
1685       0,                        /* simple (N/A).  */
1686       0,                        /* flag_setting (N/A).  */
1687       COSTS_N_INSNS (3),        /* extend.  */
1688       0,                        /* add (N/A).  */
1689       COSTS_N_INSNS (3),        /* extend_add.  */
1690       0                         /* idiv (N/A).  */
1691     }
1692   },
1693   /* LD/ST */
1694   {
1695     COSTS_N_INSNS (3),  /* load.  */
1696     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1697     COSTS_N_INSNS (3),  /* ldrd.  */
1698     COSTS_N_INSNS (4),  /* ldm_1st.  */
1699     1,                  /* ldm_regs_per_insn_1st.  */
1700     2,                  /* ldm_regs_per_insn_subsequent.  */
1701     COSTS_N_INSNS (4),  /* loadf.  */
1702     COSTS_N_INSNS (4),  /* loadd.  */
1703     0,                  /* load_unaligned.  */
1704     0,                  /* store.  */
1705     0,                  /* strd.  */
1706     COSTS_N_INSNS (1),  /* stm_1st.  */
1707     1,                  /* stm_regs_per_insn_1st.  */
1708     2,                  /* stm_regs_per_insn_subsequent.  */
1709     0,                  /* storef.  */
1710     0,                  /* stored.  */
1711     0,                  /* store_unaligned.  */
1712     COSTS_N_INSNS (1),  /* loadv.  */
1713     COSTS_N_INSNS (1)   /* storev.  */
1714   },
1715   {
1716     /* FP SFmode */
1717     {
1718       COSTS_N_INSNS (17),       /* div.  */
1719       COSTS_N_INSNS (4),        /* mult.  */
1720       COSTS_N_INSNS (8),        /* mult_addsub. */
1721       COSTS_N_INSNS (8),        /* fma.  */
1722       COSTS_N_INSNS (4),        /* addsub.  */
1723       COSTS_N_INSNS (2),        /* fpconst. */
1724       COSTS_N_INSNS (2),        /* neg.  */
1725       COSTS_N_INSNS (5),        /* compare.  */
1726       COSTS_N_INSNS (4),        /* widen.  */
1727       COSTS_N_INSNS (4),        /* narrow.  */
1728       COSTS_N_INSNS (4),        /* toint.  */
1729       COSTS_N_INSNS (4),        /* fromint.  */
1730       COSTS_N_INSNS (4)         /* roundint.  */
1731     },
1732     /* FP DFmode */
1733     {
1734       COSTS_N_INSNS (31),       /* div.  */
1735       COSTS_N_INSNS (4),        /* mult.  */
1736       COSTS_N_INSNS (8),        /* mult_addsub.  */
1737       COSTS_N_INSNS (8),        /* fma.  */
1738       COSTS_N_INSNS (4),        /* addsub.  */
1739       COSTS_N_INSNS (2),        /* fpconst.  */
1740       COSTS_N_INSNS (2),        /* neg.  */
1741       COSTS_N_INSNS (2),        /* compare.  */
1742       COSTS_N_INSNS (4),        /* widen.  */
1743       COSTS_N_INSNS (4),        /* narrow.  */
1744       COSTS_N_INSNS (4),        /* toint.  */
1745       COSTS_N_INSNS (4),        /* fromint.  */
1746       COSTS_N_INSNS (4)         /* roundint.  */
1747     }
1748   },
1749   /* Vector */
1750   {
1751     COSTS_N_INSNS (1),  /* alu.  */
1752     COSTS_N_INSNS (4),  /* mult.  */
1753     COSTS_N_INSNS (1),  /* movi.  */
1754     COSTS_N_INSNS (2),  /* dup.  */
1755     COSTS_N_INSNS (2)   /* extract.  */
1756   }
1757 };
1758
1759 const struct cpu_cost_table v7m_extra_costs =
1760 {
1761   /* ALU */
1762   {
1763     0,                  /* arith.  */
1764     0,                  /* logical.  */
1765     0,                  /* shift.  */
1766     0,                  /* shift_reg.  */
1767     0,                  /* arith_shift.  */
1768     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1769     0,                  /* log_shift.  */
1770     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1771     0,                  /* extend.  */
1772     COSTS_N_INSNS (1),  /* extend_arith.  */
1773     0,                  /* bfi.  */
1774     0,                  /* bfx.  */
1775     0,                  /* clz.  */
1776     0,                  /* rev.  */
1777     COSTS_N_INSNS (1),  /* non_exec.  */
1778     false               /* non_exec_costs_exec.  */
1779   },
1780   {
1781     /* MULT SImode */
1782     {
1783       COSTS_N_INSNS (1),        /* simple.  */
1784       COSTS_N_INSNS (1),        /* flag_setting.  */
1785       COSTS_N_INSNS (2),        /* extend.  */
1786       COSTS_N_INSNS (1),        /* add.  */
1787       COSTS_N_INSNS (3),        /* extend_add.  */
1788       COSTS_N_INSNS (8)         /* idiv.  */
1789     },
1790     /* MULT DImode */
1791     {
1792       0,                        /* simple (N/A).  */
1793       0,                        /* flag_setting (N/A).  */
1794       COSTS_N_INSNS (2),        /* extend.  */
1795       0,                        /* add (N/A).  */
1796       COSTS_N_INSNS (3),        /* extend_add.  */
1797       0                         /* idiv (N/A).  */
1798     }
1799   },
1800   /* LD/ST */
1801   {
1802     COSTS_N_INSNS (2),  /* load.  */
1803     0,                  /* load_sign_extend.  */
1804     COSTS_N_INSNS (3),  /* ldrd.  */
1805     COSTS_N_INSNS (2),  /* ldm_1st.  */
1806     1,                  /* ldm_regs_per_insn_1st.  */
1807     1,                  /* ldm_regs_per_insn_subsequent.  */
1808     COSTS_N_INSNS (2),  /* loadf.  */
1809     COSTS_N_INSNS (3),  /* loadd.  */
1810     COSTS_N_INSNS (1),  /* load_unaligned.  */
1811     COSTS_N_INSNS (2),  /* store.  */
1812     COSTS_N_INSNS (3),  /* strd.  */
1813     COSTS_N_INSNS (2),  /* stm_1st.  */
1814     1,                  /* stm_regs_per_insn_1st.  */
1815     1,                  /* stm_regs_per_insn_subsequent.  */
1816     COSTS_N_INSNS (2),  /* storef.  */
1817     COSTS_N_INSNS (3),  /* stored.  */
1818     COSTS_N_INSNS (1),  /* store_unaligned.  */
1819     COSTS_N_INSNS (1),  /* loadv.  */
1820     COSTS_N_INSNS (1)   /* storev.  */
1821   },
1822   {
1823     /* FP SFmode */
1824     {
1825       COSTS_N_INSNS (7),        /* div.  */
1826       COSTS_N_INSNS (2),        /* mult.  */
1827       COSTS_N_INSNS (5),        /* mult_addsub.  */
1828       COSTS_N_INSNS (3),        /* fma.  */
1829       COSTS_N_INSNS (1),        /* addsub.  */
1830       0,                        /* fpconst.  */
1831       0,                        /* neg.  */
1832       0,                        /* compare.  */
1833       0,                        /* widen.  */
1834       0,                        /* narrow.  */
1835       0,                        /* toint.  */
1836       0,                        /* fromint.  */
1837       0                         /* roundint.  */
1838     },
1839     /* FP DFmode */
1840     {
1841       COSTS_N_INSNS (15),       /* div.  */
1842       COSTS_N_INSNS (5),        /* mult.  */
1843       COSTS_N_INSNS (7),        /* mult_addsub.  */
1844       COSTS_N_INSNS (7),        /* fma.  */
1845       COSTS_N_INSNS (3),        /* addsub.  */
1846       0,                        /* fpconst.  */
1847       0,                        /* neg.  */
1848       0,                        /* compare.  */
1849       0,                        /* widen.  */
1850       0,                        /* narrow.  */
1851       0,                        /* toint.  */
1852       0,                        /* fromint.  */
1853       0                         /* roundint.  */
1854     }
1855   },
1856   /* Vector */
1857   {
1858     COSTS_N_INSNS (1),  /* alu.  */
1859     COSTS_N_INSNS (4),  /* mult.  */
1860     COSTS_N_INSNS (1),  /* movi.  */
1861     COSTS_N_INSNS (2),  /* dup.  */
1862     COSTS_N_INSNS (2)   /* extract.  */
1863   }
1864 };
1865
1866 const struct addr_mode_cost_table generic_addr_mode_costs =
1867 {
1868   /* int.  */
1869   {
1870     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1871     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1872     COSTS_N_INSNS (0)   /* AMO_WB.  */
1873   },
1874   /* float.  */
1875   {
1876     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1877     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1878     COSTS_N_INSNS (0)   /* AMO_WB.  */
1879   },
1880   /* vector.  */
1881   {
1882     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1883     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1884     COSTS_N_INSNS (0)   /* AMO_WB.  */
1885   }
1886 };
1887
1888 const struct tune_params arm_slowmul_tune =
1889 {
1890   &generic_extra_costs,                 /* Insn extra costs.  */
1891   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1892   NULL,                                 /* Sched adj cost.  */
1893   arm_default_branch_cost,
1894   &arm_default_vec_cost,
1895   3,                                            /* Constant limit.  */
1896   5,                                            /* Max cond insns.  */
1897   8,                                            /* Memset max inline.  */
1898   1,                                            /* Issue rate.  */
1899   ARM_PREFETCH_NOT_BENEFICIAL,
1900   tune_params::PREF_CONST_POOL_TRUE,
1901   tune_params::PREF_LDRD_FALSE,
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1903   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1904   tune_params::DISPARAGE_FLAGS_NEITHER,
1905   tune_params::PREF_NEON_STRINGOPS_FALSE,
1906   tune_params::FUSE_NOTHING,
1907   tune_params::SCHED_AUTOPREF_OFF
1908 };
1909
1910 const struct tune_params arm_fastmul_tune =
1911 {
1912   &generic_extra_costs,                 /* Insn extra costs.  */
1913   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1914   NULL,                                 /* Sched adj cost.  */
1915   arm_default_branch_cost,
1916   &arm_default_vec_cost,
1917   1,                                            /* Constant limit.  */
1918   5,                                            /* Max cond insns.  */
1919   8,                                            /* Memset max inline.  */
1920   1,                                            /* Issue rate.  */
1921   ARM_PREFETCH_NOT_BENEFICIAL,
1922   tune_params::PREF_CONST_POOL_TRUE,
1923   tune_params::PREF_LDRD_FALSE,
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1925   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1926   tune_params::DISPARAGE_FLAGS_NEITHER,
1927   tune_params::PREF_NEON_STRINGOPS_FALSE,
1928   tune_params::FUSE_NOTHING,
1929   tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1934
1935 const struct tune_params arm_strongarm_tune =
1936 {
1937   &generic_extra_costs,                 /* Insn extra costs.  */
1938   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1939   NULL,                                 /* Sched adj cost.  */
1940   arm_default_branch_cost,
1941   &arm_default_vec_cost,
1942   1,                                            /* Constant limit.  */
1943   3,                                            /* Max cond insns.  */
1944   8,                                            /* Memset max inline.  */
1945   1,                                            /* Issue rate.  */
1946   ARM_PREFETCH_NOT_BENEFICIAL,
1947   tune_params::PREF_CONST_POOL_TRUE,
1948   tune_params::PREF_LDRD_FALSE,
1949   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1950   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1951   tune_params::DISPARAGE_FLAGS_NEITHER,
1952   tune_params::PREF_NEON_STRINGOPS_FALSE,
1953   tune_params::FUSE_NOTHING,
1954   tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_xscale_tune =
1958 {
1959   &generic_extra_costs,                 /* Insn extra costs.  */
1960   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1961   xscale_sched_adjust_cost,
1962   arm_default_branch_cost,
1963   &arm_default_vec_cost,
1964   2,                                            /* Constant limit.  */
1965   3,                                            /* Max cond insns.  */
1966   8,                                            /* Memset max inline.  */
1967   1,                                            /* Issue rate.  */
1968   ARM_PREFETCH_NOT_BENEFICIAL,
1969   tune_params::PREF_CONST_POOL_TRUE,
1970   tune_params::PREF_LDRD_FALSE,
1971   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1972   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1973   tune_params::DISPARAGE_FLAGS_NEITHER,
1974   tune_params::PREF_NEON_STRINGOPS_FALSE,
1975   tune_params::FUSE_NOTHING,
1976   tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_9e_tune =
1980 {
1981   &generic_extra_costs,                 /* Insn extra costs.  */
1982   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1983   NULL,                                 /* Sched adj cost.  */
1984   arm_default_branch_cost,
1985   &arm_default_vec_cost,
1986   1,                                            /* Constant limit.  */
1987   5,                                            /* Max cond insns.  */
1988   8,                                            /* Memset max inline.  */
1989   1,                                            /* Issue rate.  */
1990   ARM_PREFETCH_NOT_BENEFICIAL,
1991   tune_params::PREF_CONST_POOL_TRUE,
1992   tune_params::PREF_LDRD_FALSE,
1993   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1994   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1995   tune_params::DISPARAGE_FLAGS_NEITHER,
1996   tune_params::PREF_NEON_STRINGOPS_FALSE,
1997   tune_params::FUSE_NOTHING,
1998   tune_params::SCHED_AUTOPREF_OFF
1999 };
2000
2001 const struct tune_params arm_marvell_pj4_tune =
2002 {
2003   &generic_extra_costs,                 /* Insn extra costs.  */
2004   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2005   NULL,                                 /* Sched adj cost.  */
2006   arm_default_branch_cost,
2007   &arm_default_vec_cost,
2008   1,                                            /* Constant limit.  */
2009   5,                                            /* Max cond insns.  */
2010   8,                                            /* Memset max inline.  */
2011   2,                                            /* Issue rate.  */
2012   ARM_PREFETCH_NOT_BENEFICIAL,
2013   tune_params::PREF_CONST_POOL_TRUE,
2014   tune_params::PREF_LDRD_FALSE,
2015   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2016   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2017   tune_params::DISPARAGE_FLAGS_NEITHER,
2018   tune_params::PREF_NEON_STRINGOPS_FALSE,
2019   tune_params::FUSE_NOTHING,
2020   tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_v6t2_tune =
2024 {
2025   &generic_extra_costs,                 /* Insn extra costs.  */
2026   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2027   NULL,                                 /* Sched adj cost.  */
2028   arm_default_branch_cost,
2029   &arm_default_vec_cost,
2030   1,                                            /* Constant limit.  */
2031   5,                                            /* Max cond insns.  */
2032   8,                                            /* Memset max inline.  */
2033   1,                                            /* Issue rate.  */
2034   ARM_PREFETCH_NOT_BENEFICIAL,
2035   tune_params::PREF_CONST_POOL_FALSE,
2036   tune_params::PREF_LDRD_FALSE,
2037   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2038   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2039   tune_params::DISPARAGE_FLAGS_NEITHER,
2040   tune_params::PREF_NEON_STRINGOPS_FALSE,
2041   tune_params::FUSE_NOTHING,
2042   tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045
2046 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2047 const struct tune_params arm_cortex_tune =
2048 {
2049   &generic_extra_costs,
2050   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2051   NULL,                                 /* Sched adj cost.  */
2052   arm_default_branch_cost,
2053   &arm_default_vec_cost,
2054   1,                                            /* Constant limit.  */
2055   5,                                            /* Max cond insns.  */
2056   8,                                            /* Memset max inline.  */
2057   2,                                            /* Issue rate.  */
2058   ARM_PREFETCH_NOT_BENEFICIAL,
2059   tune_params::PREF_CONST_POOL_FALSE,
2060   tune_params::PREF_LDRD_FALSE,
2061   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2062   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2063   tune_params::DISPARAGE_FLAGS_NEITHER,
2064   tune_params::PREF_NEON_STRINGOPS_FALSE,
2065   tune_params::FUSE_NOTHING,
2066   tune_params::SCHED_AUTOPREF_OFF
2067 };
2068
2069 const struct tune_params arm_cortex_a8_tune =
2070 {
2071   &cortexa8_extra_costs,
2072   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2073   NULL,                                 /* Sched adj cost.  */
2074   arm_default_branch_cost,
2075   &arm_default_vec_cost,
2076   1,                                            /* Constant limit.  */
2077   5,                                            /* Max cond insns.  */
2078   8,                                            /* Memset max inline.  */
2079   2,                                            /* Issue rate.  */
2080   ARM_PREFETCH_NOT_BENEFICIAL,
2081   tune_params::PREF_CONST_POOL_FALSE,
2082   tune_params::PREF_LDRD_FALSE,
2083   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2084   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2085   tune_params::DISPARAGE_FLAGS_NEITHER,
2086   tune_params::PREF_NEON_STRINGOPS_TRUE,
2087   tune_params::FUSE_NOTHING,
2088   tune_params::SCHED_AUTOPREF_OFF
2089 };
2090
2091 const struct tune_params arm_cortex_a7_tune =
2092 {
2093   &cortexa7_extra_costs,
2094   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2095   NULL,                                 /* Sched adj cost.  */
2096   arm_default_branch_cost,
2097   &arm_default_vec_cost,
2098   1,                                            /* Constant limit.  */
2099   5,                                            /* Max cond insns.  */
2100   8,                                            /* Memset max inline.  */
2101   2,                                            /* Issue rate.  */
2102   ARM_PREFETCH_NOT_BENEFICIAL,
2103   tune_params::PREF_CONST_POOL_FALSE,
2104   tune_params::PREF_LDRD_FALSE,
2105   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2106   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2107   tune_params::DISPARAGE_FLAGS_NEITHER,
2108   tune_params::PREF_NEON_STRINGOPS_TRUE,
2109   tune_params::FUSE_NOTHING,
2110   tune_params::SCHED_AUTOPREF_OFF
2111 };
2112
2113 const struct tune_params arm_cortex_a15_tune =
2114 {
2115   &cortexa15_extra_costs,
2116   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2117   NULL,                                 /* Sched adj cost.  */
2118   arm_default_branch_cost,
2119   &arm_default_vec_cost,
2120   1,                                            /* Constant limit.  */
2121   2,                                            /* Max cond insns.  */
2122   8,                                            /* Memset max inline.  */
2123   3,                                            /* Issue rate.  */
2124   ARM_PREFETCH_NOT_BENEFICIAL,
2125   tune_params::PREF_CONST_POOL_FALSE,
2126   tune_params::PREF_LDRD_TRUE,
2127   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2128   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2129   tune_params::DISPARAGE_FLAGS_ALL,
2130   tune_params::PREF_NEON_STRINGOPS_TRUE,
2131   tune_params::FUSE_NOTHING,
2132   tune_params::SCHED_AUTOPREF_FULL
2133 };
2134
2135 const struct tune_params arm_cortex_a35_tune =
2136 {
2137   &cortexa53_extra_costs,
2138   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2139   NULL,                                 /* Sched adj cost.  */
2140   arm_default_branch_cost,
2141   &arm_default_vec_cost,
2142   1,                                            /* Constant limit.  */
2143   5,                                            /* Max cond insns.  */
2144   8,                                            /* Memset max inline.  */
2145   1,                                            /* Issue rate.  */
2146   ARM_PREFETCH_NOT_BENEFICIAL,
2147   tune_params::PREF_CONST_POOL_FALSE,
2148   tune_params::PREF_LDRD_FALSE,
2149   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2150   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2151   tune_params::DISPARAGE_FLAGS_NEITHER,
2152   tune_params::PREF_NEON_STRINGOPS_TRUE,
2153   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2154   tune_params::SCHED_AUTOPREF_OFF
2155 };
2156
2157 const struct tune_params arm_cortex_a53_tune =
2158 {
2159   &cortexa53_extra_costs,
2160   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2161   NULL,                                 /* Sched adj cost.  */
2162   arm_default_branch_cost,
2163   &arm_default_vec_cost,
2164   1,                                            /* Constant limit.  */
2165   5,                                            /* Max cond insns.  */
2166   8,                                            /* Memset max inline.  */
2167   2,                                            /* Issue rate.  */
2168   ARM_PREFETCH_NOT_BENEFICIAL,
2169   tune_params::PREF_CONST_POOL_FALSE,
2170   tune_params::PREF_LDRD_FALSE,
2171   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2172   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2173   tune_params::DISPARAGE_FLAGS_NEITHER,
2174   tune_params::PREF_NEON_STRINGOPS_TRUE,
2175   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2176   tune_params::SCHED_AUTOPREF_OFF
2177 };
2178
2179 const struct tune_params arm_cortex_a57_tune =
2180 {
2181   &cortexa57_extra_costs,
2182   &generic_addr_mode_costs,             /* addressing mode costs */
2183   NULL,                                 /* Sched adj cost.  */
2184   arm_default_branch_cost,
2185   &arm_default_vec_cost,
2186   1,                                            /* Constant limit.  */
2187   2,                                            /* Max cond insns.  */
2188   8,                                            /* Memset max inline.  */
2189   3,                                            /* Issue rate.  */
2190   ARM_PREFETCH_NOT_BENEFICIAL,
2191   tune_params::PREF_CONST_POOL_FALSE,
2192   tune_params::PREF_LDRD_TRUE,
2193   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2194   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2195   tune_params::DISPARAGE_FLAGS_ALL,
2196   tune_params::PREF_NEON_STRINGOPS_TRUE,
2197   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2198   tune_params::SCHED_AUTOPREF_FULL
2199 };
2200
2201 const struct tune_params arm_exynosm1_tune =
2202 {
2203   &exynosm1_extra_costs,
2204   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2205   NULL,                                         /* Sched adj cost.  */
2206   arm_default_branch_cost,
2207   &arm_default_vec_cost,
2208   1,                                            /* Constant limit.  */
2209   2,                                            /* Max cond insns.  */
2210   8,                                            /* Memset max inline.  */
2211   3,                                            /* Issue rate.  */
2212   ARM_PREFETCH_NOT_BENEFICIAL,
2213   tune_params::PREF_CONST_POOL_FALSE,
2214   tune_params::PREF_LDRD_TRUE,
2215   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2216   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2217   tune_params::DISPARAGE_FLAGS_ALL,
2218   tune_params::PREF_NEON_STRINGOPS_TRUE,
2219   tune_params::FUSE_NOTHING,
2220   tune_params::SCHED_AUTOPREF_OFF
2221 };
2222
2223 const struct tune_params arm_xgene1_tune =
2224 {
2225   &xgene1_extra_costs,
2226   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2227   NULL,                                 /* Sched adj cost.  */
2228   arm_default_branch_cost,
2229   &arm_default_vec_cost,
2230   1,                                            /* Constant limit.  */
2231   2,                                            /* Max cond insns.  */
2232   32,                                           /* Memset max inline.  */
2233   4,                                            /* Issue rate.  */
2234   ARM_PREFETCH_NOT_BENEFICIAL,
2235   tune_params::PREF_CONST_POOL_FALSE,
2236   tune_params::PREF_LDRD_TRUE,
2237   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2238   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2239   tune_params::DISPARAGE_FLAGS_ALL,
2240   tune_params::PREF_NEON_STRINGOPS_FALSE,
2241   tune_params::FUSE_NOTHING,
2242   tune_params::SCHED_AUTOPREF_OFF
2243 };
2244
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246    less appealing.  Set max_insns_skipped to a low value.  */
2247
2248 const struct tune_params arm_cortex_a5_tune =
2249 {
2250   &cortexa5_extra_costs,
2251   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2252   NULL,                                 /* Sched adj cost.  */
2253   arm_cortex_a5_branch_cost,
2254   &arm_default_vec_cost,
2255   1,                                            /* Constant limit.  */
2256   1,                                            /* Max cond insns.  */
2257   8,                                            /* Memset max inline.  */
2258   2,                                            /* Issue rate.  */
2259   ARM_PREFETCH_NOT_BENEFICIAL,
2260   tune_params::PREF_CONST_POOL_FALSE,
2261   tune_params::PREF_LDRD_FALSE,
2262   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2263   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2264   tune_params::DISPARAGE_FLAGS_NEITHER,
2265   tune_params::PREF_NEON_STRINGOPS_TRUE,
2266   tune_params::FUSE_NOTHING,
2267   tune_params::SCHED_AUTOPREF_OFF
2268 };
2269
2270 const struct tune_params arm_cortex_a9_tune =
2271 {
2272   &cortexa9_extra_costs,
2273   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2274   cortex_a9_sched_adjust_cost,
2275   arm_default_branch_cost,
2276   &arm_default_vec_cost,
2277   1,                                            /* Constant limit.  */
2278   5,                                            /* Max cond insns.  */
2279   8,                                            /* Memset max inline.  */
2280   2,                                            /* Issue rate.  */
2281   ARM_PREFETCH_BENEFICIAL(4,32,32),
2282   tune_params::PREF_CONST_POOL_FALSE,
2283   tune_params::PREF_LDRD_FALSE,
2284   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2285   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2286   tune_params::DISPARAGE_FLAGS_NEITHER,
2287   tune_params::PREF_NEON_STRINGOPS_FALSE,
2288   tune_params::FUSE_NOTHING,
2289   tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_cortex_a12_tune =
2293 {
2294   &cortexa12_extra_costs,
2295   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2296   NULL,                                 /* Sched adj cost.  */
2297   arm_default_branch_cost,
2298   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2299   1,                                            /* Constant limit.  */
2300   2,                                            /* Max cond insns.  */
2301   8,                                            /* Memset max inline.  */
2302   2,                                            /* Issue rate.  */
2303   ARM_PREFETCH_NOT_BENEFICIAL,
2304   tune_params::PREF_CONST_POOL_FALSE,
2305   tune_params::PREF_LDRD_TRUE,
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2307   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2308   tune_params::DISPARAGE_FLAGS_ALL,
2309   tune_params::PREF_NEON_STRINGOPS_TRUE,
2310   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 const struct tune_params arm_cortex_a73_tune =
2315 {
2316   &cortexa57_extra_costs,
2317   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2318   NULL,                                         /* Sched adj cost.  */
2319   arm_default_branch_cost,
2320   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2321   1,                                            /* Constant limit.  */
2322   2,                                            /* Max cond insns.  */
2323   8,                                            /* Memset max inline.  */
2324   2,                                            /* Issue rate.  */
2325   ARM_PREFETCH_NOT_BENEFICIAL,
2326   tune_params::PREF_CONST_POOL_FALSE,
2327   tune_params::PREF_LDRD_TRUE,
2328   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2329   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2330   tune_params::DISPARAGE_FLAGS_ALL,
2331   tune_params::PREF_NEON_STRINGOPS_TRUE,
2332   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2333   tune_params::SCHED_AUTOPREF_FULL
2334 };
2335
2336 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2337    cycle to execute each.  An LDR from the constant pool also takes two cycles
2338    to execute, but mildly increases pipelining opportunity (consecutive
2339    loads/stores can be pipelined together, saving one cycle), and may also
2340    improve icache utilisation.  Hence we prefer the constant pool for such
2341    processors.  */
2342
2343 const struct tune_params arm_v7m_tune =
2344 {
2345   &v7m_extra_costs,
2346   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2347   NULL,                                 /* Sched adj cost.  */
2348   arm_cortex_m_branch_cost,
2349   &arm_default_vec_cost,
2350   1,                                            /* Constant limit.  */
2351   2,                                            /* Max cond insns.  */
2352   8,                                            /* Memset max inline.  */
2353   1,                                            /* Issue rate.  */
2354   ARM_PREFETCH_NOT_BENEFICIAL,
2355   tune_params::PREF_CONST_POOL_TRUE,
2356   tune_params::PREF_LDRD_FALSE,
2357   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2358   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2359   tune_params::DISPARAGE_FLAGS_NEITHER,
2360   tune_params::PREF_NEON_STRINGOPS_FALSE,
2361   tune_params::FUSE_NOTHING,
2362   tune_params::SCHED_AUTOPREF_OFF
2363 };
2364
2365 /* Cortex-M7 tuning.  */
2366
2367 const struct tune_params arm_cortex_m7_tune =
2368 {
2369   &v7m_extra_costs,
2370   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2371   NULL,                                 /* Sched adj cost.  */
2372   arm_cortex_m7_branch_cost,
2373   &arm_default_vec_cost,
2374   0,                                            /* Constant limit.  */
2375   1,                                            /* Max cond insns.  */
2376   8,                                            /* Memset max inline.  */
2377   2,                                            /* Issue rate.  */
2378   ARM_PREFETCH_NOT_BENEFICIAL,
2379   tune_params::PREF_CONST_POOL_TRUE,
2380   tune_params::PREF_LDRD_FALSE,
2381   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2382   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2383   tune_params::DISPARAGE_FLAGS_NEITHER,
2384   tune_params::PREF_NEON_STRINGOPS_FALSE,
2385   tune_params::FUSE_NOTHING,
2386   tune_params::SCHED_AUTOPREF_OFF
2387 };
2388
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2391    cortex-m23.  */
2392 const struct tune_params arm_v6m_tune =
2393 {
2394   &generic_extra_costs,                 /* Insn extra costs.  */
2395   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2396   NULL,                                 /* Sched adj cost.  */
2397   arm_default_branch_cost,
2398   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2399   1,                                            /* Constant limit.  */
2400   5,                                            /* Max cond insns.  */
2401   8,                                            /* Memset max inline.  */
2402   1,                                            /* Issue rate.  */
2403   ARM_PREFETCH_NOT_BENEFICIAL,
2404   tune_params::PREF_CONST_POOL_FALSE,
2405   tune_params::PREF_LDRD_FALSE,
2406   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2407   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2408   tune_params::DISPARAGE_FLAGS_NEITHER,
2409   tune_params::PREF_NEON_STRINGOPS_FALSE,
2410   tune_params::FUSE_NOTHING,
2411   tune_params::SCHED_AUTOPREF_OFF
2412 };
2413
2414 const struct tune_params arm_fa726te_tune =
2415 {
2416   &generic_extra_costs,                         /* Insn extra costs.  */
2417   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2418   fa726te_sched_adjust_cost,
2419   arm_default_branch_cost,
2420   &arm_default_vec_cost,
2421   1,                                            /* Constant limit.  */
2422   5,                                            /* Max cond insns.  */
2423   8,                                            /* Memset max inline.  */
2424   2,                                            /* Issue rate.  */
2425   ARM_PREFETCH_NOT_BENEFICIAL,
2426   tune_params::PREF_CONST_POOL_TRUE,
2427   tune_params::PREF_LDRD_FALSE,
2428   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2429   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2430   tune_params::DISPARAGE_FLAGS_NEITHER,
2431   tune_params::PREF_NEON_STRINGOPS_FALSE,
2432   tune_params::FUSE_NOTHING,
2433   tune_params::SCHED_AUTOPREF_OFF
2434 };
2435
2436 /* Auto-generated CPU, FPU and architecture tables.  */
2437 #include "arm-cpu-data.h"
2438
2439 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2440    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2441    is thus chosen to be big enough to hold the longest architecture name.  */
2442
2443 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2444
2445 /* Supported TLS relocations.  */
2446
2447 enum tls_reloc {
2448   TLS_GD32,
2449   TLS_GD32_FDPIC,
2450   TLS_LDM32,
2451   TLS_LDM32_FDPIC,
2452   TLS_LDO32,
2453   TLS_IE32,
2454   TLS_IE32_FDPIC,
2455   TLS_LE32,
2456   TLS_DESCSEQ   /* GNU scheme */
2457 };
2458
2459 /* The maximum number of insns to be used when loading a constant.  */
2460 inline static int
2461 arm_constant_limit (bool size_p)
2462 {
2463   return size_p ? 1 : current_tune->constant_limit;
2464 }
2465
2466 /* Emit an insn that's a simple single-set.  Both the operands must be known
2467    to be valid.  */
2468 inline static rtx_insn *
2469 emit_set_insn (rtx x, rtx y)
2470 {
2471   return emit_insn (gen_rtx_SET (x, y));
2472 }
2473
2474 /* Return the number of bits set in VALUE.  */
2475 static unsigned
2476 bit_count (unsigned long value)
2477 {
2478   unsigned long count = 0;
2479
2480   while (value)
2481     {
2482       count++;
2483       value &= value - 1;  /* Clear the least-significant set bit.  */
2484     }
2485
2486   return count;
2487 }
2488
2489 /* Return the number of bits set in BMAP.  */
2490 static unsigned
2491 bitmap_popcount (const sbitmap bmap)
2492 {
2493   unsigned int count = 0;
2494   unsigned int n = 0;
2495   sbitmap_iterator sbi;
2496
2497   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2498     count++;
2499   return count;
2500 }
2501
2502 typedef struct
2503 {
2504   machine_mode mode;
2505   const char *name;
2506 } arm_fixed_mode_set;
2507
2508 /* A small helper for setting fixed-point library libfuncs.  */
2509
2510 static void
2511 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2512                              const char *funcname, const char *modename,
2513                              int num_suffix)
2514 {
2515   char buffer[50];
2516
2517   if (num_suffix == 0)
2518     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2519   else
2520     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2521
2522   set_optab_libfunc (optable, mode, buffer);
2523 }
2524
2525 static void
2526 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2527                             machine_mode from, const char *funcname,
2528                             const char *toname, const char *fromname)
2529 {
2530   char buffer[50];
2531   const char *maybe_suffix_2 = "";
2532
2533   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2534   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2535       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2536       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2537     maybe_suffix_2 = "2";
2538
2539   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2540            maybe_suffix_2);
2541
2542   set_conv_libfunc (optable, to, from, buffer);
2543 }
2544
2545 static GTY(()) rtx speculation_barrier_libfunc;
2546
2547 /* Record that we have no arithmetic or comparison libfuncs for
2548    machine mode MODE.  */
2549
2550 static void
2551 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2552 {
2553   /* Arithmetic.  */
2554   set_optab_libfunc (add_optab, mode, NULL);
2555   set_optab_libfunc (sdiv_optab, mode, NULL);
2556   set_optab_libfunc (smul_optab, mode, NULL);
2557   set_optab_libfunc (neg_optab, mode, NULL);
2558   set_optab_libfunc (sub_optab, mode, NULL);
2559
2560   /* Comparisons.  */
2561   set_optab_libfunc (eq_optab, mode, NULL);
2562   set_optab_libfunc (ne_optab, mode, NULL);
2563   set_optab_libfunc (lt_optab, mode, NULL);
2564   set_optab_libfunc (le_optab, mode, NULL);
2565   set_optab_libfunc (ge_optab, mode, NULL);
2566   set_optab_libfunc (gt_optab, mode, NULL);
2567   set_optab_libfunc (unord_optab, mode, NULL);
2568 }
2569
2570 /* Set up library functions unique to ARM.  */
2571 static void
2572 arm_init_libfuncs (void)
2573 {
2574   machine_mode mode_iter;
2575
2576   /* For Linux, we have access to kernel support for atomic operations.  */
2577   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2578     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2579
2580   /* There are no special library functions unless we are using the
2581      ARM BPABI.  */
2582   if (!TARGET_BPABI)
2583     return;
2584
2585   /* The functions below are described in Section 4 of the "Run-Time
2586      ABI for the ARM architecture", Version 1.0.  */
2587
2588   /* Double-precision floating-point arithmetic.  Table 2.  */
2589   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2590   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2591   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2592   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2593   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2594
2595   /* Double-precision comparisons.  Table 3.  */
2596   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2597   set_optab_libfunc (ne_optab, DFmode, NULL);
2598   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2599   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2600   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2601   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2602   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2603
2604   /* Single-precision floating-point arithmetic.  Table 4.  */
2605   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2606   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2607   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2608   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2609   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2610
2611   /* Single-precision comparisons.  Table 5.  */
2612   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2613   set_optab_libfunc (ne_optab, SFmode, NULL);
2614   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2615   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2616   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2617   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2618   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2619
2620   /* Floating-point to integer conversions.  Table 6.  */
2621   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2622   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2623   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2624   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2625   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2626   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2627   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2628   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2629
2630   /* Conversions between floating types.  Table 7.  */
2631   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2632   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2633
2634   /* Integer to floating-point conversions.  Table 8.  */
2635   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2636   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2637   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2638   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2639   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2640   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2641   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2642   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2643
2644   /* Long long.  Table 9.  */
2645   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2646   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2647   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2648   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2649   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2650   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2651   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2652   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2653
2654   /* Integer (32/32->32) division.  \S 4.3.1.  */
2655   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2656   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2657
2658   /* The divmod functions are designed so that they can be used for
2659      plain division, even though they return both the quotient and the
2660      remainder.  The quotient is returned in the usual location (i.e.,
2661      r0 for SImode, {r0, r1} for DImode), just as would be expected
2662      for an ordinary division routine.  Because the AAPCS calling
2663      conventions specify that all of { r0, r1, r2, r3 } are
2664      callee-saved registers, there is no need to tell the compiler
2665      explicitly that those registers are clobbered by these
2666      routines.  */
2667   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2668   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2669
2670   /* For SImode division the ABI provides div-without-mod routines,
2671      which are faster.  */
2672   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2673   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2674
2675   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2676      divmod libcalls instead.  */
2677   set_optab_libfunc (smod_optab, DImode, NULL);
2678   set_optab_libfunc (umod_optab, DImode, NULL);
2679   set_optab_libfunc (smod_optab, SImode, NULL);
2680   set_optab_libfunc (umod_optab, SImode, NULL);
2681
2682   /* Half-precision float operations.  The compiler handles all operations
2683      with NULL libfuncs by converting the SFmode.  */
2684   switch (arm_fp16_format)
2685     {
2686     case ARM_FP16_FORMAT_IEEE:
2687     case ARM_FP16_FORMAT_ALTERNATIVE:
2688
2689       /* Conversions.  */
2690       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2691                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2692                          ? "__gnu_f2h_ieee"
2693                          : "__gnu_f2h_alternative"));
2694       set_conv_libfunc (sext_optab, SFmode, HFmode,
2695                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2696                          ? "__gnu_h2f_ieee"
2697                          : "__gnu_h2f_alternative"));
2698
2699       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2700                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2701                          ? "__gnu_d2h_ieee"
2702                          : "__gnu_d2h_alternative"));
2703
2704       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2705       break;
2706
2707     default:
2708       break;
2709     }
2710
2711   /* For all possible libcalls in BFmode, record NULL.  */
2712   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2713     {
2714       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2715       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2716       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2717       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2718     }
2719   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2720
2721   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2722   {
2723     const arm_fixed_mode_set fixed_arith_modes[] =
2724       {
2725         { E_QQmode, "qq" },
2726         { E_UQQmode, "uqq" },
2727         { E_HQmode, "hq" },
2728         { E_UHQmode, "uhq" },
2729         { E_SQmode, "sq" },
2730         { E_USQmode, "usq" },
2731         { E_DQmode, "dq" },
2732         { E_UDQmode, "udq" },
2733         { E_TQmode, "tq" },
2734         { E_UTQmode, "utq" },
2735         { E_HAmode, "ha" },
2736         { E_UHAmode, "uha" },
2737         { E_SAmode, "sa" },
2738         { E_USAmode, "usa" },
2739         { E_DAmode, "da" },
2740         { E_UDAmode, "uda" },
2741         { E_TAmode, "ta" },
2742         { E_UTAmode, "uta" }
2743       };
2744     const arm_fixed_mode_set fixed_conv_modes[] =
2745       {
2746         { E_QQmode, "qq" },
2747         { E_UQQmode, "uqq" },
2748         { E_HQmode, "hq" },
2749         { E_UHQmode, "uhq" },
2750         { E_SQmode, "sq" },
2751         { E_USQmode, "usq" },
2752         { E_DQmode, "dq" },
2753         { E_UDQmode, "udq" },
2754         { E_TQmode, "tq" },
2755         { E_UTQmode, "utq" },
2756         { E_HAmode, "ha" },
2757         { E_UHAmode, "uha" },
2758         { E_SAmode, "sa" },
2759         { E_USAmode, "usa" },
2760         { E_DAmode, "da" },
2761         { E_UDAmode, "uda" },
2762         { E_TAmode, "ta" },
2763         { E_UTAmode, "uta" },
2764         { E_QImode, "qi" },
2765         { E_HImode, "hi" },
2766         { E_SImode, "si" },
2767         { E_DImode, "di" },
2768         { E_TImode, "ti" },
2769         { E_SFmode, "sf" },
2770         { E_DFmode, "df" }
2771       };
2772     unsigned int i, j;
2773
2774     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2775       {
2776         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2777                                      "add", fixed_arith_modes[i].name, 3);
2778         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2779                                      "ssadd", fixed_arith_modes[i].name, 3);
2780         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2781                                      "usadd", fixed_arith_modes[i].name, 3);
2782         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2783                                      "sub", fixed_arith_modes[i].name, 3);
2784         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2785                                      "sssub", fixed_arith_modes[i].name, 3);
2786         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2787                                      "ussub", fixed_arith_modes[i].name, 3);
2788         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2789                                      "mul", fixed_arith_modes[i].name, 3);
2790         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2791                                      "ssmul", fixed_arith_modes[i].name, 3);
2792         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2793                                      "usmul", fixed_arith_modes[i].name, 3);
2794         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2795                                      "div", fixed_arith_modes[i].name, 3);
2796         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2797                                      "udiv", fixed_arith_modes[i].name, 3);
2798         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2799                                      "ssdiv", fixed_arith_modes[i].name, 3);
2800         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2801                                      "usdiv", fixed_arith_modes[i].name, 3);
2802         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2803                                      "neg", fixed_arith_modes[i].name, 2);
2804         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2805                                      "ssneg", fixed_arith_modes[i].name, 2);
2806         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2807                                      "usneg", fixed_arith_modes[i].name, 2);
2808         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2809                                      "ashl", fixed_arith_modes[i].name, 3);
2810         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2811                                      "ashr", fixed_arith_modes[i].name, 3);
2812         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2813                                      "lshr", fixed_arith_modes[i].name, 3);
2814         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2815                                      "ssashl", fixed_arith_modes[i].name, 3);
2816         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2817                                      "usashl", fixed_arith_modes[i].name, 3);
2818         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2819                                      "cmp", fixed_arith_modes[i].name, 2);
2820       }
2821
2822     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2823       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2824         {
2825           if (i == j
2826               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2827                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2828             continue;
2829
2830           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2831                                       fixed_conv_modes[j].mode, "fract",
2832                                       fixed_conv_modes[i].name,
2833                                       fixed_conv_modes[j].name);
2834           arm_set_fixed_conv_libfunc (satfract_optab,
2835                                       fixed_conv_modes[i].mode,
2836                                       fixed_conv_modes[j].mode, "satfract",
2837                                       fixed_conv_modes[i].name,
2838                                       fixed_conv_modes[j].name);
2839           arm_set_fixed_conv_libfunc (fractuns_optab,
2840                                       fixed_conv_modes[i].mode,
2841                                       fixed_conv_modes[j].mode, "fractuns",
2842                                       fixed_conv_modes[i].name,
2843                                       fixed_conv_modes[j].name);
2844           arm_set_fixed_conv_libfunc (satfractuns_optab,
2845                                       fixed_conv_modes[i].mode,
2846                                       fixed_conv_modes[j].mode, "satfractuns",
2847                                       fixed_conv_modes[i].name,
2848                                       fixed_conv_modes[j].name);
2849         }
2850   }
2851
2852   if (TARGET_AAPCS_BASED)
2853     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2854
2855   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2856 }
2857
2858 /* Implement TARGET_GIMPLE_FOLD_BUILTIN.  */
2859 static bool
2860 arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
2861 {
2862   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
2863   tree fndecl = gimple_call_fndecl (stmt);
2864   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
2865   unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
2866   gimple *new_stmt = NULL;
2867   switch (code & ARM_BUILTIN_CLASS)
2868     {
2869     case ARM_BUILTIN_GENERAL:
2870       break;
2871     case ARM_BUILTIN_MVE:
2872       new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
2873     }
2874   if (!new_stmt)
2875     return false;
2876
2877   gsi_replace (gsi, new_stmt, true);
2878   return true;
2879 }
2880
2881 /* On AAPCS systems, this is the "struct __va_list".  */
2882 static GTY(()) tree va_list_type;
2883
2884 /* Return the type to use as __builtin_va_list.  */
2885 static tree
2886 arm_build_builtin_va_list (void)
2887 {
2888   tree va_list_name;
2889   tree ap_field;
2890
2891   if (!TARGET_AAPCS_BASED)
2892     return std_build_builtin_va_list ();
2893
2894   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2895      defined as:
2896
2897        struct __va_list
2898        {
2899          void *__ap;
2900        };
2901
2902      The C Library ABI further reinforces this definition in \S
2903      4.1.
2904
2905      We must follow this definition exactly.  The structure tag
2906      name is visible in C++ mangled names, and thus forms a part
2907      of the ABI.  The field name may be used by people who
2908      #include <stdarg.h>.  */
2909   /* Create the type.  */
2910   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2911   /* Give it the required name.  */
2912   va_list_name = build_decl (BUILTINS_LOCATION,
2913                              TYPE_DECL,
2914                              get_identifier ("__va_list"),
2915                              va_list_type);
2916   DECL_ARTIFICIAL (va_list_name) = 1;
2917   TYPE_NAME (va_list_type) = va_list_name;
2918   TYPE_STUB_DECL (va_list_type) = va_list_name;
2919   /* Create the __ap field.  */
2920   ap_field = build_decl (BUILTINS_LOCATION,
2921                          FIELD_DECL,
2922                          get_identifier ("__ap"),
2923                          ptr_type_node);
2924   DECL_ARTIFICIAL (ap_field) = 1;
2925   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2926   TYPE_FIELDS (va_list_type) = ap_field;
2927   /* Compute its layout.  */
2928   layout_type (va_list_type);
2929
2930   return va_list_type;
2931 }
2932
2933 /* Return an expression of type "void *" pointing to the next
2934    available argument in a variable-argument list.  VALIST is the
2935    user-level va_list object, of type __builtin_va_list.  */
2936 static tree
2937 arm_extract_valist_ptr (tree valist)
2938 {
2939   if (TREE_TYPE (valist) == error_mark_node)
2940     return error_mark_node;
2941
2942   /* On an AAPCS target, the pointer is stored within "struct
2943      va_list".  */
2944   if (TARGET_AAPCS_BASED)
2945     {
2946       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2947       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2948                        valist, ap_field, NULL_TREE);
2949     }
2950
2951   return valist;
2952 }
2953
2954 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2955 static void
2956 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2957 {
2958   valist = arm_extract_valist_ptr (valist);
2959   std_expand_builtin_va_start (valist, nextarg);
2960 }
2961
2962 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2963 static tree
2964 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2965                           gimple_seq *post_p)
2966 {
2967   valist = arm_extract_valist_ptr (valist);
2968   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2969 }
2970
2971 /* Check any incompatible options that the user has specified.  */
2972 static void
2973 arm_option_check_internal (struct gcc_options *opts)
2974 {
2975   int flags = opts->x_target_flags;
2976
2977   /* iWMMXt and NEON are incompatible.  */
2978   if (TARGET_IWMMXT
2979       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2980     error ("iWMMXt and NEON are incompatible");
2981
2982   /* Make sure that the processor choice does not conflict with any of the
2983      other command line choices.  */
2984   if (TARGET_ARM_P (flags)
2985       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2986     error ("target CPU does not support ARM mode");
2987
2988   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2989   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2990     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2991
2992   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2993     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2994
2995   /* If this target is normally configured to use APCS frames, warn if they
2996      are turned off and debugging is turned on.  */
2997   if (TARGET_ARM_P (flags)
2998       && write_symbols != NO_DEBUG
2999       && !TARGET_APCS_FRAME
3000       && (TARGET_DEFAULT & MASK_APCS_FRAME))
3001     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3002              "debugging");
3003
3004   /* iWMMXt unsupported under Thumb mode.  */
3005   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
3006     error ("iWMMXt unsupported under Thumb mode");
3007
3008   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
3009     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3010
3011   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
3012     {
3013       error ("RTP PIC is incompatible with Thumb");
3014       flag_pic = 0;
3015     }
3016
3017   if (target_pure_code || target_slow_flash_data)
3018     {
3019       const char *flag = (target_pure_code ? "-mpure-code" :
3020                                              "-mslow-flash-data");
3021       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
3022
3023       /* We only support -mslow-flash-data on M-profile targets with
3024          MOVT.  */
3025       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
3026         error ("%s only supports non-pic code on M-profile targets with the "
3027                "MOVT instruction", flag);
3028
3029       /* We only support -mpure-code on M-profile targets.  */
3030       if (target_pure_code && common_unsupported_modes)
3031         error ("%s only supports non-pic code on M-profile targets", flag);
3032
3033       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3034          -mword-relocations forbids relocation of MOVT/MOVW.  */
3035       if (target_word_relocations)
3036         error ("%s incompatible with %<-mword-relocations%>", flag);
3037     }
3038 }
3039
3040 /* Recompute the global settings depending on target attribute options.  */
3041
3042 static void
3043 arm_option_params_internal (void)
3044 {
3045   /* If we are not using the default (ARM mode) section anchor offset
3046      ranges, then set the correct ranges now.  */
3047   if (TARGET_THUMB1)
3048     {
3049       /* Thumb-1 LDR instructions cannot have negative offsets.
3050          Permissible positive offset ranges are 5-bit (for byte loads),
3051          6-bit (for halfword loads), or 7-bit (for word loads).
3052          Empirical results suggest a 7-bit anchor range gives the best
3053          overall code size.  */
3054       targetm.min_anchor_offset = 0;
3055       targetm.max_anchor_offset = 127;
3056     }
3057   else if (TARGET_THUMB2)
3058     {
3059       /* The minimum is set such that the total size of the block
3060          for a particular anchor is 248 + 1 + 4095 bytes, which is
3061          divisible by eight, ensuring natural spacing of anchors.  */
3062       targetm.min_anchor_offset = -248;
3063       targetm.max_anchor_offset = 4095;
3064     }
3065   else
3066     {
3067       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3068       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3069     }
3070
3071   /* Increase the number of conditional instructions with -Os.  */
3072   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3073
3074   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3075   if (TARGET_THUMB2)
3076     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3077
3078   if (TARGET_THUMB1)
3079     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3080   else
3081     targetm.md_asm_adjust = arm_md_asm_adjust;
3082 }
3083
3084 /* True if -mflip-thumb should next add an attribute for the default
3085    mode, false if it should next add an attribute for the opposite mode.  */
3086 static GTY(()) bool thumb_flipper;
3087
3088 /* Options after initial target override.  */
3089 static GTY(()) tree init_optimize;
3090
3091 static void
3092 arm_override_options_after_change_1 (struct gcc_options *opts,
3093                                      struct gcc_options *opts_set)
3094 {
3095   /* -falign-functions without argument: supply one.  */
3096   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3097     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3098       && opts->x_optimize_size ? "2" : "4";
3099 }
3100
3101 /* Implement targetm.override_options_after_change.  */
3102
3103 static void
3104 arm_override_options_after_change (void)
3105 {
3106   arm_override_options_after_change_1 (&global_options, &global_options_set);
3107 }
3108
3109 /* Implement TARGET_OPTION_RESTORE.  */
3110 static void
3111 arm_option_restore (struct gcc_options */* opts */,
3112                     struct gcc_options */* opts_set */,
3113                     struct cl_target_option *ptr)
3114 {
3115   arm_configure_build_target (&arm_active_target, ptr, false);
3116   arm_option_reconfigure_globals ();
3117 }
3118
3119 /* Reset options between modes that the user has specified.  */
3120 static void
3121 arm_option_override_internal (struct gcc_options *opts,
3122                               struct gcc_options *opts_set)
3123 {
3124   arm_override_options_after_change_1 (opts, opts_set);
3125
3126   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3127     {
3128       /* The default is to enable interworking, so this warning message would
3129          be confusing to users who have just compiled with
3130          eg, -march=armv4.  */
3131       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3132       opts->x_target_flags &= ~MASK_INTERWORK;
3133     }
3134
3135   if (TARGET_THUMB_P (opts->x_target_flags)
3136       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3137     {
3138       warning (0, "target CPU does not support THUMB instructions");
3139       opts->x_target_flags &= ~MASK_THUMB;
3140     }
3141
3142   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3143     {
3144       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3145       opts->x_target_flags &= ~MASK_APCS_FRAME;
3146     }
3147
3148   /* Callee super interworking implies thumb interworking.  Adding
3149      this to the flags here simplifies the logic elsewhere.  */
3150   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3151     opts->x_target_flags |= MASK_INTERWORK;
3152
3153   /* need to remember initial values so combinaisons of options like
3154      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3155   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3156
3157   if (! opts_set->x_arm_restrict_it)
3158     opts->x_arm_restrict_it = arm_arch8;
3159
3160   /* ARM execution state and M profile don't have [restrict] IT.  */
3161   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3162     opts->x_arm_restrict_it = 0;
3163
3164   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3165   if (!opts_set->x_arm_restrict_it
3166       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3167     opts->x_arm_restrict_it = 0;
3168
3169   /* Enable -munaligned-access by default for
3170      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3171      i.e. Thumb2 and ARM state only.
3172      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3173      - ARMv8 architecture-base processors.
3174
3175      Disable -munaligned-access by default for
3176      - all pre-ARMv6 architecture-based processors
3177      - ARMv6-M architecture-based processors
3178      - ARMv8-M Baseline processors.  */
3179
3180   if (! opts_set->x_unaligned_access)
3181     {
3182       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3183                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3184     }
3185   else if (opts->x_unaligned_access == 1
3186            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3187     {
3188       warning (0, "target CPU does not support unaligned accesses");
3189      opts->x_unaligned_access = 0;
3190     }
3191
3192   /* Don't warn since it's on by default in -O2.  */
3193   if (TARGET_THUMB1_P (opts->x_target_flags))
3194     opts->x_flag_schedule_insns = 0;
3195   else
3196     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3197
3198   /* Disable shrink-wrap when optimizing function for size, since it tends to
3199      generate additional returns.  */
3200   if (optimize_function_for_size_p (cfun)
3201       && TARGET_THUMB2_P (opts->x_target_flags))
3202     opts->x_flag_shrink_wrap = false;
3203   else
3204     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3205
3206   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3207      - epilogue_insns - does not accurately model the corresponding insns
3208      emitted in the asm file.  In particular, see the comment in thumb_exit
3209      'Find out how many of the (return) argument registers we can corrupt'.
3210      As a consequence, the epilogue may clobber registers without fipa-ra
3211      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3212      TODO: Accurately model clobbers for epilogue_insns and reenable
3213      fipa-ra.  */
3214   if (TARGET_THUMB1_P (opts->x_target_flags))
3215     opts->x_flag_ipa_ra = 0;
3216   else
3217     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3218
3219   /* Thumb2 inline assembly code should always use unified syntax.
3220      This will apply to ARM and Thumb1 eventually.  */
3221   if (TARGET_THUMB2_P (opts->x_target_flags))
3222     opts->x_inline_asm_unified = true;
3223
3224   if (arm_stack_protector_guard == SSP_GLOBAL
3225       && opts->x_arm_stack_protector_guard_offset_str)
3226     {
3227       error ("incompatible options %<-mstack-protector-guard=global%> and "
3228              "%<-mstack-protector-guard-offset=%s%>",
3229              arm_stack_protector_guard_offset_str);
3230     }
3231
3232   if (opts->x_arm_stack_protector_guard_offset_str)
3233     {
3234       char *end;
3235       const char *str = arm_stack_protector_guard_offset_str;
3236       errno = 0;
3237       long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3238       if (!*str || *end || errno)
3239         error ("%qs is not a valid offset in %qs", str,
3240                "-mstack-protector-guard-offset=");
3241       arm_stack_protector_guard_offset = offs;
3242     }
3243
3244   if (arm_current_function_pac_enabled_p ())
3245     {
3246       if (!arm_arch8m_main)
3247         error ("This architecture does not support branch protection "
3248                "instructions");
3249       if (TARGET_TPCS_FRAME)
3250         sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3251     }
3252
3253 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3254   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3255 #endif
3256 }
3257
3258 static sbitmap isa_all_fpubits_internal;
3259 static sbitmap isa_all_fpbits;
3260 static sbitmap isa_quirkbits;
3261
3262 /* Configure a build target TARGET from the user-specified options OPTS and
3263    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3264    architecture have been specified, but the two are not identical.  */
3265 void
3266 arm_configure_build_target (struct arm_build_target *target,
3267                             struct cl_target_option *opts,
3268                             bool warn_compatible)
3269 {
3270   const cpu_option *arm_selected_tune = NULL;
3271   const arch_option *arm_selected_arch = NULL;
3272   const cpu_option *arm_selected_cpu = NULL;
3273   const arm_fpu_desc *arm_selected_fpu = NULL;
3274   const char *tune_opts = NULL;
3275   const char *arch_opts = NULL;
3276   const char *cpu_opts = NULL;
3277
3278   bitmap_clear (target->isa);
3279   target->core_name = NULL;
3280   target->arch_name = NULL;
3281
3282   if (opts->x_arm_arch_string)
3283     {
3284       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3285                                                       "-march",
3286                                                       opts->x_arm_arch_string);
3287       arch_opts = strchr (opts->x_arm_arch_string, '+');
3288     }
3289
3290   if (opts->x_arm_cpu_string)
3291     {
3292       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3293                                                     opts->x_arm_cpu_string);
3294       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3295       arm_selected_tune = arm_selected_cpu;
3296       /* If taking the tuning from -mcpu, we don't need to rescan the
3297          options for tuning.  */
3298     }
3299
3300   if (opts->x_arm_tune_string)
3301     {
3302       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3303                                                      opts->x_arm_tune_string);
3304       tune_opts = strchr (opts->x_arm_tune_string, '+');
3305     }
3306
3307   if (opts->x_arm_branch_protection_string)
3308     {
3309       aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string,
3310                                          "-mbranch-protection=");
3311
3312       if (aarch_ra_sign_key != AARCH_KEY_A)
3313         {
3314           warning (0, "invalid key type for %<-mbranch-protection=%>");
3315           aarch_ra_sign_key = AARCH_KEY_A;
3316         }
3317     }
3318
3319   if (arm_selected_arch)
3320     {
3321       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3322       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3323                                  arch_opts);
3324
3325       if (arm_selected_cpu)
3326         {
3327           auto_sbitmap cpu_isa (isa_num_bits);
3328           auto_sbitmap isa_delta (isa_num_bits);
3329
3330           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3331           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3332                                      cpu_opts);
3333           bitmap_xor (isa_delta, cpu_isa, target->isa);
3334           /* Ignore any bits that are quirk bits.  */
3335           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3336           /* If the user (or the default configuration) has specified a
3337              specific FPU, then ignore any bits that depend on the FPU
3338              configuration.  Do similarly if using the soft-float
3339              ABI.  */
3340           if (opts->x_arm_fpu_index != TARGET_FPU_auto
3341               || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3342             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3343
3344           if (!bitmap_empty_p (isa_delta))
3345             {
3346               if (warn_compatible)
3347                 warning (0, "switch %<-mcpu=%s%> conflicts "
3348                          "with switch %<-march=%s%>",
3349                          opts->x_arm_cpu_string,
3350                          opts->x_arm_arch_string);
3351
3352               /* -march wins for code generation.
3353                  -mcpu wins for default tuning.  */
3354               if (!arm_selected_tune)
3355                 arm_selected_tune = arm_selected_cpu;
3356
3357               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3358               target->arch_name = arm_selected_arch->common.name;
3359             }
3360           else
3361             {
3362               /* Architecture and CPU are essentially the same.
3363                  Prefer the CPU setting.  */
3364               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3365               target->core_name = arm_selected_cpu->common.name;
3366               /* Copy the CPU's capabilities, so that we inherit the
3367                  appropriate extensions and quirks.  */
3368               bitmap_copy (target->isa, cpu_isa);
3369             }
3370         }
3371       else
3372         {
3373           /* Pick a CPU based on the architecture.  */
3374           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3375           target->arch_name = arm_selected_arch->common.name;
3376           /* Note: target->core_name is left unset in this path.  */
3377         }
3378     }
3379   else if (arm_selected_cpu)
3380     {
3381       target->core_name = arm_selected_cpu->common.name;
3382       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3383       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3384                                  cpu_opts);
3385       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3386     }
3387   /* If the user did not specify a processor or architecture, choose
3388      one for them.  */
3389   else
3390     {
3391       const cpu_option *sel;
3392       auto_sbitmap sought_isa (isa_num_bits);
3393       bitmap_clear (sought_isa);
3394       auto_sbitmap default_isa (isa_num_bits);
3395
3396       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3397                                                     TARGET_CPU_DEFAULT);
3398       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3399       gcc_assert (arm_selected_cpu->common.name);
3400
3401       /* RWE: All of the selection logic below (to the end of this
3402          'if' clause) looks somewhat suspect.  It appears to be mostly
3403          there to support forcing thumb support when the default CPU
3404          does not have thumb (somewhat dubious in terms of what the
3405          user might be expecting).  I think it should be removed once
3406          support for the pre-thumb era cores is removed.  */
3407       sel = arm_selected_cpu;
3408       arm_initialize_isa (default_isa, sel->common.isa_bits);
3409       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3410                                  cpu_opts);
3411
3412       /* Now check to see if the user has specified any command line
3413          switches that require certain abilities from the cpu.  */
3414
3415       if (TARGET_INTERWORK || TARGET_THUMB)
3416         bitmap_set_bit (sought_isa, isa_bit_thumb);
3417
3418       /* If there are such requirements and the default CPU does not
3419          satisfy them, we need to run over the complete list of
3420          cores looking for one that is satisfactory.  */
3421       if (!bitmap_empty_p (sought_isa)
3422           && !bitmap_subset_p (sought_isa, default_isa))
3423         {
3424           auto_sbitmap candidate_isa (isa_num_bits);
3425           /* We're only interested in a CPU with at least the
3426              capabilities of the default CPU and the required
3427              additional features.  */
3428           bitmap_ior (default_isa, default_isa, sought_isa);
3429
3430           /* Try to locate a CPU type that supports all of the abilities
3431              of the default CPU, plus the extra abilities requested by
3432              the user.  */
3433           for (sel = all_cores; sel->common.name != NULL; sel++)
3434             {
3435               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3436               /* An exact match?  */
3437               if (bitmap_equal_p (default_isa, candidate_isa))
3438                 break;
3439             }
3440
3441           if (sel->common.name == NULL)
3442             {
3443               unsigned current_bit_count = isa_num_bits;
3444               const cpu_option *best_fit = NULL;
3445
3446               /* Ideally we would like to issue an error message here
3447                  saying that it was not possible to find a CPU compatible
3448                  with the default CPU, but which also supports the command
3449                  line options specified by the programmer, and so they
3450                  ought to use the -mcpu=<name> command line option to
3451                  override the default CPU type.
3452
3453                  If we cannot find a CPU that has exactly the
3454                  characteristics of the default CPU and the given
3455                  command line options we scan the array again looking
3456                  for a best match.  The best match must have at least
3457                  the capabilities of the perfect match.  */
3458               for (sel = all_cores; sel->common.name != NULL; sel++)
3459                 {
3460                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3461
3462                   if (bitmap_subset_p (default_isa, candidate_isa))
3463                     {
3464                       unsigned count;
3465
3466                       bitmap_and_compl (candidate_isa, candidate_isa,
3467                                         default_isa);
3468                       count = bitmap_popcount (candidate_isa);
3469
3470                       if (count < current_bit_count)
3471                         {
3472                           best_fit = sel;
3473                           current_bit_count = count;
3474                         }
3475                     }
3476
3477                   gcc_assert (best_fit);
3478                   sel = best_fit;
3479                 }
3480             }
3481           arm_selected_cpu = sel;
3482         }
3483
3484       /* Now we know the CPU, we can finally initialize the target
3485          structure.  */
3486       target->core_name = arm_selected_cpu->common.name;
3487       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3488       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3489                                  cpu_opts);
3490       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3491     }
3492
3493   gcc_assert (arm_selected_cpu);
3494   gcc_assert (arm_selected_arch);
3495
3496   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3497     {
3498       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3499       auto_sbitmap fpu_bits (isa_num_bits);
3500
3501       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3502       /* This should clear out ALL bits relating to the FPU/simd
3503          extensions, to avoid potentially invalid combinations later on
3504          that we can't match.  At present we only clear out those bits
3505          that can be set by -mfpu.  This should be fixed in GCC-12.  */
3506       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3507       bitmap_ior (target->isa, target->isa, fpu_bits);
3508     }
3509
3510   /* If we have the soft-float ABI, clear any feature bits relating to use of
3511      floating-point operations.  They'll just confuse things later on.  */
3512   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3513     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3514
3515   /* There may be implied bits which we still need to enable. These are
3516      non-named features which are needed to complete other sets of features,
3517      but cannot be enabled from arm-cpus.in due to being shared between
3518      multiple fgroups. Each entry in all_implied_fbits is of the form
3519      ante -> cons, meaning that if the feature "ante" is enabled, we should
3520      implicitly enable "cons".  */
3521   const struct fbit_implication *impl = all_implied_fbits;
3522   while (impl->ante)
3523     {
3524       if (bitmap_bit_p (target->isa, impl->ante))
3525         bitmap_set_bit (target->isa, impl->cons);
3526       impl++;
3527     }
3528
3529   if (!arm_selected_tune)
3530     arm_selected_tune = arm_selected_cpu;
3531   else /* Validate the features passed to -mtune.  */
3532     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3533
3534   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3535
3536   /* Finish initializing the target structure.  */
3537   if (!target->arch_name)
3538     target->arch_name = arm_selected_arch->common.name;
3539   target->arch_pp_name = arm_selected_arch->arch;
3540   target->base_arch = arm_selected_arch->base_arch;
3541   target->profile = arm_selected_arch->profile;
3542
3543   target->tune_flags = tune_data->tune_flags;
3544   target->tune = tune_data->tune;
3545   target->tune_core = tune_data->scheduler;
3546 }
3547
3548 /* Fix up any incompatible options that the user has specified.  */
3549 static void
3550 arm_option_override (void)
3551 {
3552   static const enum isa_feature fpu_bitlist_internal[]
3553     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3554   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3555   static const enum isa_feature fp_bitlist[]
3556     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3557   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3558   cl_target_option opts;
3559
3560   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3561   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3562
3563   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3564   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3565   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3566   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3567
3568   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3569
3570   if (!OPTION_SET_P (arm_fpu_index))
3571     {
3572       bool ok;
3573       int fpu_index;
3574
3575       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3576                                   CL_TARGET);
3577       gcc_assert (ok);
3578       arm_fpu_index = (enum fpu_type) fpu_index;
3579     }
3580
3581   cl_target_option_save (&opts, &global_options, &global_options_set);
3582   arm_configure_build_target (&arm_active_target, &opts, true);
3583
3584 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3585   SUBTARGET_OVERRIDE_OPTIONS;
3586 #endif
3587
3588   /* Initialize boolean versions of the architectural flags, for use
3589      in the arm.md file and for enabling feature flags.  */
3590   arm_option_reconfigure_globals ();
3591
3592   arm_tune = arm_active_target.tune_core;
3593   tune_flags = arm_active_target.tune_flags;
3594   current_tune = arm_active_target.tune;
3595
3596   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3597   if (TARGET_APCS_FRAME)
3598     flag_shrink_wrap = false;
3599
3600   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3601     {
3602       warning (0, "%<-mapcs-stack-check%> incompatible with "
3603                "%<-mno-apcs-frame%>");
3604       target_flags |= MASK_APCS_FRAME;
3605     }
3606
3607   if (TARGET_POKE_FUNCTION_NAME)
3608     target_flags |= MASK_APCS_FRAME;
3609
3610   if (TARGET_APCS_REENT && flag_pic)
3611     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3612
3613   if (TARGET_APCS_REENT)
3614     warning (0, "APCS reentrant code not supported.  Ignored");
3615
3616   /* Set up some tuning parameters.  */
3617   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3618   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3619   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3620   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3621   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3622   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3623
3624   /* For arm2/3 there is no need to do any scheduling if we are doing
3625      software floating-point.  */
3626   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3627     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3628
3629   /* Override the default structure alignment for AAPCS ABI.  */
3630   if (!OPTION_SET_P (arm_structure_size_boundary))
3631     {
3632       if (TARGET_AAPCS_BASED)
3633         arm_structure_size_boundary = 8;
3634     }
3635   else
3636     {
3637       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3638
3639       if (arm_structure_size_boundary != 8
3640           && arm_structure_size_boundary != 32
3641           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3642         {
3643           if (ARM_DOUBLEWORD_ALIGN)
3644             warning (0,
3645                      "structure size boundary can only be set to 8, 32 or 64");
3646           else
3647             warning (0, "structure size boundary can only be set to 8 or 32");
3648           arm_structure_size_boundary
3649             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3650         }
3651     }
3652
3653   if (TARGET_VXWORKS_RTP)
3654     {
3655       if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3656         arm_pic_data_is_text_relative = 0;
3657     }
3658   else if (flag_pic
3659            && !arm_pic_data_is_text_relative
3660            && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3661     /* When text & data segments don't have a fixed displacement, the
3662        intended use is with a single, read only, pic base register.
3663        Unless the user explicitly requested not to do that, set
3664        it.  */
3665     target_flags |= MASK_SINGLE_PIC_BASE;
3666
3667   /* If stack checking is disabled, we can use r10 as the PIC register,
3668      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3669   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3670     {
3671       if (TARGET_VXWORKS_RTP)
3672         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3673       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3674     }
3675
3676   if (flag_pic && TARGET_VXWORKS_RTP)
3677     arm_pic_register = 9;
3678
3679   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3680   if (TARGET_FDPIC)
3681     {
3682       arm_pic_register = FDPIC_REGNUM;
3683       if (TARGET_THUMB1)
3684         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3685     }
3686
3687   if (arm_pic_register_string != NULL)
3688     {
3689       int pic_register = decode_reg_name (arm_pic_register_string);
3690
3691       if (!flag_pic)
3692         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3693
3694       /* Prevent the user from choosing an obviously stupid PIC register.  */
3695       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3696                || pic_register == HARD_FRAME_POINTER_REGNUM
3697                || pic_register == STACK_POINTER_REGNUM
3698                || pic_register >= PC_REGNUM
3699                || (TARGET_VXWORKS_RTP
3700                    && (unsigned int) pic_register != arm_pic_register))
3701         error ("unable to use %qs for PIC register", arm_pic_register_string);
3702       else
3703         arm_pic_register = pic_register;
3704     }
3705
3706   if (flag_pic)
3707     target_word_relocations = 1;
3708
3709   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3710   if (fix_cm3_ldrd == 2)
3711     {
3712       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3713         fix_cm3_ldrd = 1;
3714       else
3715         fix_cm3_ldrd = 0;
3716     }
3717
3718   /* Enable fix_vlldm by default if required.  */
3719   if (fix_vlldm == 2)
3720     {
3721       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3722         fix_vlldm = 1;
3723       else
3724         fix_vlldm = 0;
3725     }
3726
3727   /* Enable fix_aes by default if required.  */
3728   if (fix_aes_erratum_1742098 == 2)
3729     {
3730       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3731         fix_aes_erratum_1742098 = 1;
3732       else
3733         fix_aes_erratum_1742098 = 0;
3734     }
3735
3736   /* Hot/Cold partitioning is not currently supported, since we can't
3737      handle literal pool placement in that case.  */
3738   if (flag_reorder_blocks_and_partition)
3739     {
3740       inform (input_location,
3741               "%<-freorder-blocks-and-partition%> not supported "
3742               "on this architecture");
3743       flag_reorder_blocks_and_partition = 0;
3744       flag_reorder_blocks = 1;
3745     }
3746
3747   if (flag_pic)
3748     /* Hoisting PIC address calculations more aggressively provides a small,
3749        but measurable, size reduction for PIC code.  Therefore, we decrease
3750        the bar for unrestricted expression hoisting to the cost of PIC address
3751        calculation, which is 2 instructions.  */
3752     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3753                          param_gcse_unrestricted_cost, 2);
3754
3755   /* ARM EABI defaults to strict volatile bitfields.  */
3756   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3757       && abi_version_at_least(2))
3758     flag_strict_volatile_bitfields = 1;
3759
3760   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3761      have deemed it beneficial (signified by setting
3762      prefetch.num_slots to 1 or more).  */
3763   if (flag_prefetch_loop_arrays < 0
3764       && HAVE_prefetch
3765       && optimize >= 3
3766       && current_tune->prefetch.num_slots > 0)
3767     flag_prefetch_loop_arrays = 1;
3768
3769   /* Set up parameters to be used in prefetching algorithm.  Do not
3770      override the defaults unless we are tuning for a core we have
3771      researched values for.  */
3772   if (current_tune->prefetch.num_slots > 0)
3773     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3774                          param_simultaneous_prefetches,
3775                          current_tune->prefetch.num_slots);
3776   if (current_tune->prefetch.l1_cache_line_size >= 0)
3777     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3778                          param_l1_cache_line_size,
3779                          current_tune->prefetch.l1_cache_line_size);
3780   if (current_tune->prefetch.l1_cache_line_size >= 0)
3781     {
3782       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3783                            param_destruct_interfere_size,
3784                            current_tune->prefetch.l1_cache_line_size);
3785       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3786                            param_construct_interfere_size,
3787                            current_tune->prefetch.l1_cache_line_size);
3788     }
3789   else
3790     {
3791       /* For a generic ARM target, JF Bastien proposed using 64 for both.  */
3792       /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3793          constructive?  */
3794       /* More recent Cortex chips have a 64-byte cache line, but are marked
3795          ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults.  */
3796       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3797                            param_destruct_interfere_size, 64);
3798       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3799                            param_construct_interfere_size, 64);
3800     }
3801
3802   if (current_tune->prefetch.l1_cache_size >= 0)
3803     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3804                          param_l1_cache_size,
3805                          current_tune->prefetch.l1_cache_size);
3806
3807   /* Look through ready list and all of queue for instructions
3808      relevant for L2 auto-prefetcher.  */
3809   int sched_autopref_queue_depth;
3810
3811   switch (current_tune->sched_autopref)
3812     {
3813     case tune_params::SCHED_AUTOPREF_OFF:
3814       sched_autopref_queue_depth = -1;
3815       break;
3816
3817     case tune_params::SCHED_AUTOPREF_RANK:
3818       sched_autopref_queue_depth = 0;
3819       break;
3820
3821     case tune_params::SCHED_AUTOPREF_FULL:
3822       sched_autopref_queue_depth = max_insn_queue_index + 1;
3823       break;
3824
3825     default:
3826       gcc_unreachable ();
3827     }
3828
3829   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3830                        param_sched_autopref_queue_depth,
3831                        sched_autopref_queue_depth);
3832
3833   /* Currently, for slow flash data, we just disable literal pools.  We also
3834      disable it for pure-code.  */
3835   if (target_slow_flash_data || target_pure_code)
3836     arm_disable_literal_pool = true;
3837
3838   /* Disable scheduling fusion by default if it's not armv7 processor
3839      or doesn't prefer ldrd/strd.  */
3840   if (flag_schedule_fusion == 2
3841       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3842     flag_schedule_fusion = 0;
3843
3844   /* Need to remember initial options before they are overriden.  */
3845   init_optimize = build_optimization_node (&global_options,
3846                                            &global_options_set);
3847
3848   arm_options_perform_arch_sanity_checks ();
3849   arm_option_override_internal (&global_options, &global_options_set);
3850   arm_option_check_internal (&global_options);
3851   arm_option_params_internal ();
3852
3853   /* Create the default target_options structure.  */
3854   target_option_default_node = target_option_current_node
3855     = build_target_option_node (&global_options, &global_options_set);
3856
3857   /* Register global variables with the garbage collector.  */
3858   arm_add_gc_roots ();
3859
3860   /* Init initial mode for testing.  */
3861   thumb_flipper = TARGET_THUMB;
3862 }
3863
3864
3865 /* Reconfigure global status flags from the active_target.isa.  */
3866 void
3867 arm_option_reconfigure_globals (void)
3868 {
3869   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3870   arm_base_arch = arm_active_target.base_arch;
3871
3872   /* Initialize boolean versions of the architectural flags, for use
3873      in the arm.md file.  */
3874   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3875   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3876   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3877   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3878   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3879   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3880   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3881   arm_arch6m = arm_arch6 && !arm_arch_notm;
3882   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3883   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3884   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3885   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3886   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3887   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3888   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3889   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3890                                     isa_bit_armv8_1m_main);
3891   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3892   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3893   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3894   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3895   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3896   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3897   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3898   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3899   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3900   arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3901   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3902   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3903   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3904
3905   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3906   if (arm_fp16_inst)
3907     {
3908       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3909         error ("selected fp16 options are incompatible");
3910       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3911     }
3912
3913   arm_arch_cde = 0;
3914   arm_arch_cde_coproc = 0;
3915   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3916                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3917                     isa_bit_cdecp6, isa_bit_cdecp7};
3918   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3919     {
3920       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3921       if (cde_bit)
3922         {
3923           arm_arch_cde |= cde_bit;
3924           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3925         }
3926     }
3927
3928   /* And finally, set up some quirks.  */
3929   arm_arch_no_volatile_ce
3930     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3931   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3932                                             isa_bit_quirk_armv6kz);
3933
3934   /* Use the cp15 method if it is available.  */
3935   if (target_thread_pointer == TP_AUTO)
3936     {
3937       if (arm_arch6k && !TARGET_THUMB1)
3938         target_thread_pointer = TP_TPIDRURO;
3939       else
3940         target_thread_pointer = TP_SOFT;
3941     }
3942
3943   if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3944     error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3945 }
3946
3947 /* Perform some validation between the desired architecture and the rest of the
3948    options.  */
3949 void
3950 arm_options_perform_arch_sanity_checks (void)
3951 {
3952   /* V5T code we generate is completely interworking capable, so we turn off
3953      TARGET_INTERWORK here to avoid many tests later on.  */
3954
3955   /* XXX However, we must pass the right pre-processor defines to CPP
3956      or GLD can get confused.  This is a hack.  */
3957   if (TARGET_INTERWORK)
3958     arm_cpp_interwork = 1;
3959
3960   if (arm_arch5t)
3961     target_flags &= ~MASK_INTERWORK;
3962
3963   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3964     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3965
3966   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3967     error ("iwmmxt abi requires an iwmmxt capable cpu");
3968
3969   /* BPABI targets use linker tricks to allow interworking on cores
3970      without thumb support.  */
3971   if (TARGET_INTERWORK
3972       && !TARGET_BPABI
3973       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3974     {
3975       warning (0, "target CPU does not support interworking" );
3976       target_flags &= ~MASK_INTERWORK;
3977     }
3978
3979   /* If soft-float is specified then don't use FPU.  */
3980   if (TARGET_SOFT_FLOAT)
3981     arm_fpu_attr = FPU_NONE;
3982   else
3983     arm_fpu_attr = FPU_VFP;
3984
3985   if (TARGET_AAPCS_BASED)
3986     {
3987       if (TARGET_CALLER_INTERWORKING)
3988         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3989       else
3990         if (TARGET_CALLEE_INTERWORKING)
3991           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3992     }
3993
3994   /* __fp16 support currently assumes the core has ldrh.  */
3995   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3996     sorry ("%<__fp16%> and no ldrh");
3997
3998   if (use_cmse && !arm_arch_cmse)
3999     error ("target CPU does not support ARMv8-M Security Extensions");
4000
4001   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4002      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
4003   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
4004     error ("ARMv8-M Security Extensions incompatible with selected FPU");
4005
4006
4007   if (TARGET_AAPCS_BASED)
4008     {
4009       if (arm_abi == ARM_ABI_IWMMXT)
4010         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
4011       else if (TARGET_HARD_FLOAT_ABI)
4012         {
4013           arm_pcs_default = ARM_PCS_AAPCS_VFP;
4014           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
4015               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
4016             error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4017         }
4018       else
4019         arm_pcs_default = ARM_PCS_AAPCS;
4020     }
4021   else
4022     {
4023       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
4024         sorry ("%<-mfloat-abi=hard%> and VFP");
4025
4026       if (arm_abi == ARM_ABI_APCS)
4027         arm_pcs_default = ARM_PCS_APCS;
4028       else
4029         arm_pcs_default = ARM_PCS_ATPCS;
4030     }
4031 }
4032
4033 /* Test whether a local function descriptor is canonical, i.e.,
4034    whether we can use GOTOFFFUNCDESC to compute the address of the
4035    function.  */
4036 static bool
4037 arm_fdpic_local_funcdesc_p (rtx fnx)
4038 {
4039   tree fn;
4040   enum symbol_visibility vis;
4041   bool ret;
4042
4043   if (!TARGET_FDPIC)
4044     return true;
4045
4046   if (! SYMBOL_REF_LOCAL_P (fnx))
4047     return false;
4048
4049   fn = SYMBOL_REF_DECL (fnx);
4050
4051   if (! fn)
4052     return false;
4053
4054   vis = DECL_VISIBILITY (fn);
4055
4056   if (vis == VISIBILITY_PROTECTED)
4057     /* Private function descriptors for protected functions are not
4058        canonical.  Temporarily change the visibility to global so that
4059        we can ensure uniqueness of funcdesc pointers.  */
4060     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4061
4062   ret = default_binds_local_p_1 (fn, flag_pic);
4063
4064   DECL_VISIBILITY (fn) = vis;
4065
4066   return ret;
4067 }
4068
4069 static void
4070 arm_add_gc_roots (void)
4071 {
4072   gcc_obstack_init(&minipool_obstack);
4073   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4074 }
4075 \f
4076 /* A table of known ARM exception types.
4077    For use with the interrupt function attribute.  */
4078
4079 typedef struct
4080 {
4081   const char *const arg;
4082   const unsigned long return_value;
4083 }
4084 isr_attribute_arg;
4085
4086 static const isr_attribute_arg isr_attribute_args [] =
4087 {
4088   { "IRQ",   ARM_FT_ISR },
4089   { "irq",   ARM_FT_ISR },
4090   { "FIQ",   ARM_FT_FIQ },
4091   { "fiq",   ARM_FT_FIQ },
4092   { "ABORT", ARM_FT_ISR },
4093   { "abort", ARM_FT_ISR },
4094   { "UNDEF", ARM_FT_EXCEPTION },
4095   { "undef", ARM_FT_EXCEPTION },
4096   { "SWI",   ARM_FT_EXCEPTION },
4097   { "swi",   ARM_FT_EXCEPTION },
4098   { NULL,    ARM_FT_NORMAL }
4099 };
4100
4101 /* Returns the (interrupt) function type of the current
4102    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
4103
4104 static unsigned long
4105 arm_isr_value (tree argument)
4106 {
4107   const isr_attribute_arg * ptr;
4108   const char *              arg;
4109
4110   if (!arm_arch_notm)
4111     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4112
4113   /* No argument - default to IRQ.  */
4114   if (argument == NULL_TREE)
4115     return ARM_FT_ISR;
4116
4117   /* Get the value of the argument.  */
4118   if (TREE_VALUE (argument) == NULL_TREE
4119       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4120     return ARM_FT_UNKNOWN;
4121
4122   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4123
4124   /* Check it against the list of known arguments.  */
4125   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4126     if (streq (arg, ptr->arg))
4127       return ptr->return_value;
4128
4129   /* An unrecognized interrupt type.  */
4130   return ARM_FT_UNKNOWN;
4131 }
4132
4133 /* Computes the type of the current function.  */
4134
4135 static unsigned long
4136 arm_compute_func_type (void)
4137 {
4138   unsigned long type = ARM_FT_UNKNOWN;
4139   tree a;
4140   tree attr;
4141
4142   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4143
4144   /* Decide if the current function is volatile.  Such functions
4145      never return, and many memory cycles can be saved by not storing
4146      register values that will never be needed again.  This optimization
4147      was added to speed up context switching in a kernel application.  */
4148   if (optimize > 0
4149       && (TREE_NOTHROW (current_function_decl)
4150           || !(flag_unwind_tables
4151                || (flag_exceptions
4152                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4153       && TREE_THIS_VOLATILE (current_function_decl))
4154     type |= ARM_FT_VOLATILE;
4155
4156   if (cfun->static_chain_decl != NULL)
4157     type |= ARM_FT_NESTED;
4158
4159   attr = DECL_ATTRIBUTES (current_function_decl);
4160
4161   a = lookup_attribute ("naked", attr);
4162   if (a != NULL_TREE)
4163     type |= ARM_FT_NAKED;
4164
4165   a = lookup_attribute ("isr", attr);
4166   if (a == NULL_TREE)
4167     a = lookup_attribute ("interrupt", attr);
4168
4169   if (a == NULL_TREE)
4170     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4171   else
4172     type |= arm_isr_value (TREE_VALUE (a));
4173
4174   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4175     type |= ARM_FT_CMSE_ENTRY;
4176
4177   return type;
4178 }
4179
4180 /* Returns the type of the current function.  */
4181
4182 unsigned long
4183 arm_current_func_type (void)
4184 {
4185   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4186     cfun->machine->func_type = arm_compute_func_type ();
4187
4188   return cfun->machine->func_type;
4189 }
4190
4191 bool
4192 arm_allocate_stack_slots_for_args (void)
4193 {
4194   /* Naked functions should not allocate stack slots for arguments.  */
4195   return !IS_NAKED (arm_current_func_type ());
4196 }
4197
4198 static bool
4199 arm_warn_func_return (tree decl)
4200 {
4201   /* Naked functions are implemented entirely in assembly, including the
4202      return sequence, so suppress warnings about this.  */
4203   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4204 }
4205
4206 \f
4207 /* Output assembler code for a block containing the constant parts
4208    of a trampoline, leaving space for the variable parts.
4209
4210    On the ARM, (if r8 is the static chain regnum, and remembering that
4211    referencing pc adds an offset of 8) the trampoline looks like:
4212            ldr          r8, [pc, #0]
4213            ldr          pc, [pc]
4214            .word        static chain value
4215            .word        function's address
4216    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4217
4218    In FDPIC mode, the trampoline looks like:
4219            .word        trampoline address
4220            .word        trampoline GOT address
4221            ldr          r12, [pc, #8] ; #4 for Arm mode
4222            ldr          r9,  [pc, #8] ; #4 for Arm mode
4223            ldr          pc,  [pc, #8] ; #4 for Arm mode
4224            .word        static chain value
4225            .word        GOT address
4226            .word        function's address
4227 */
4228
4229 static void
4230 arm_asm_trampoline_template (FILE *f)
4231 {
4232   fprintf (f, "\t.syntax unified\n");
4233
4234   if (TARGET_FDPIC)
4235     {
4236       /* The first two words are a function descriptor pointing to the
4237          trampoline code just below.  */
4238       if (TARGET_ARM)
4239         fprintf (f, "\t.arm\n");
4240       else if (TARGET_THUMB2)
4241         fprintf (f, "\t.thumb\n");
4242       else
4243         /* Only ARM and Thumb-2 are supported.  */
4244         gcc_unreachable ();
4245
4246       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4247       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4248       /* Trampoline code which sets the static chain register but also
4249          PIC register before jumping into real code.  */
4250       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4251                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4252                    TARGET_THUMB2 ? 8 : 4);
4253       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4254                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4255                    TARGET_THUMB2 ? 8 : 4);
4256       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4257                    PC_REGNUM, PC_REGNUM,
4258                    TARGET_THUMB2 ? 8 : 4);
4259       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4260     }
4261   else if (TARGET_ARM)
4262     {
4263       fprintf (f, "\t.arm\n");
4264       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4265       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4266     }
4267   else if (TARGET_THUMB2)
4268     {
4269       fprintf (f, "\t.thumb\n");
4270       /* The Thumb-2 trampoline is similar to the arm implementation.
4271          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4272       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4273                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4274       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4275     }
4276   else
4277     {
4278       ASM_OUTPUT_ALIGN (f, 2);
4279       fprintf (f, "\t.code\t16\n");
4280       fprintf (f, ".Ltrampoline_start:\n");
4281       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4282       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4283       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4284       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4285       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4286       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4287     }
4288   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4289   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4290 }
4291
4292 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4293
4294 static void
4295 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4296 {
4297   rtx fnaddr, mem, a_tramp;
4298
4299   emit_block_move (m_tramp, assemble_trampoline_template (),
4300                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4301
4302   if (TARGET_FDPIC)
4303     {
4304       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4305       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4306       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4307       /* The function start address is at offset 8, but in Thumb mode
4308          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4309          below.  */
4310       rtx trampoline_code_start
4311         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4312
4313       /* Write initial funcdesc which points to the trampoline.  */
4314       mem = adjust_address (m_tramp, SImode, 0);
4315       emit_move_insn (mem, trampoline_code_start);
4316       mem = adjust_address (m_tramp, SImode, 4);
4317       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4318       /* Setup static chain.  */
4319       mem = adjust_address (m_tramp, SImode, 20);
4320       emit_move_insn (mem, chain_value);
4321       /* GOT + real function entry point.  */
4322       mem = adjust_address (m_tramp, SImode, 24);
4323       emit_move_insn (mem, gotaddr);
4324       mem = adjust_address (m_tramp, SImode, 28);
4325       emit_move_insn (mem, fnaddr);
4326     }
4327   else
4328     {
4329       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4330       emit_move_insn (mem, chain_value);
4331
4332       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4333       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4334       emit_move_insn (mem, fnaddr);
4335     }
4336
4337   a_tramp = XEXP (m_tramp, 0);
4338   maybe_emit_call_builtin___clear_cache (a_tramp,
4339                                          plus_constant (ptr_mode,
4340                                                         a_tramp,
4341                                                         TRAMPOLINE_SIZE));
4342 }
4343
4344 /* Thumb trampolines should be entered in thumb mode, so set
4345    the bottom bit of the address.  */
4346
4347 static rtx
4348 arm_trampoline_adjust_address (rtx addr)
4349 {
4350   /* For FDPIC don't fix trampoline address since it's a function
4351      descriptor and not a function address.  */
4352   if (TARGET_THUMB && !TARGET_FDPIC)
4353     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4354                                 NULL, 0, OPTAB_LIB_WIDEN);
4355   return addr;
4356 }
4357 \f
4358 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4359    includes call-clobbered registers too.  If this is a leaf function
4360    we can just examine the registers used by the RTL, but otherwise we
4361    have to assume that whatever function is called might clobber
4362    anything, and so we have to save all the call-clobbered registers
4363    as well.  */
4364 static inline bool reg_needs_saving_p (unsigned reg)
4365 {
4366   unsigned long func_type = arm_current_func_type ();
4367
4368   if (IS_INTERRUPT (func_type))
4369     if (df_regs_ever_live_p (reg)
4370         /* Save call-clobbered core registers.  */
4371         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4372       return true;
4373     else
4374       return false;
4375   else
4376     if (!df_regs_ever_live_p (reg)
4377         || call_used_or_fixed_reg_p (reg))
4378       return false;
4379     else
4380       return true;
4381 }
4382
4383 /* Return 1 if it is possible to return using a single instruction.
4384    If SIBLING is non-null, this is a test for a return before a sibling
4385    call.  SIBLING is the call insn, so we can examine its register usage.  */
4386
4387 int
4388 use_return_insn (int iscond, rtx sibling)
4389 {
4390   int regno;
4391   unsigned int func_type;
4392   unsigned long saved_int_regs;
4393   unsigned HOST_WIDE_INT stack_adjust;
4394   arm_stack_offsets *offsets;
4395
4396   /* Never use a return instruction before reload has run.  */
4397   if (!reload_completed)
4398     return 0;
4399
4400   /* Never use a return instruction when return address signing
4401      mechanism is enabled as it requires more than one
4402      instruction.  */
4403   if (arm_current_function_pac_enabled_p ())
4404     return 0;
4405
4406   func_type = arm_current_func_type ();
4407
4408   /* Naked, volatile and stack alignment functions need special
4409      consideration.  */
4410   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4411     return 0;
4412
4413   /* So do interrupt functions that use the frame pointer and Thumb
4414      interrupt functions.  */
4415   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4416     return 0;
4417
4418   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4419       && !optimize_function_for_size_p (cfun))
4420     return 0;
4421
4422   offsets = arm_get_frame_offsets ();
4423   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4424
4425   /* As do variadic functions.  */
4426   if (crtl->args.pretend_args_size
4427       || cfun->machine->uses_anonymous_args
4428       /* Or if the function calls __builtin_eh_return () */
4429       || crtl->calls_eh_return
4430       /* Or if the function calls alloca */
4431       || cfun->calls_alloca
4432       /* Or if there is a stack adjustment.  However, if the stack pointer
4433          is saved on the stack, we can use a pre-incrementing stack load.  */
4434       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4435                                  && stack_adjust == 4))
4436       /* Or if the static chain register was saved above the frame, under the
4437          assumption that the stack pointer isn't saved on the stack.  */
4438       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4439           && arm_compute_static_chain_stack_bytes() != 0))
4440     return 0;
4441
4442   saved_int_regs = offsets->saved_regs_mask;
4443
4444   /* Unfortunately, the insn
4445
4446        ldmib sp, {..., sp, ...}
4447
4448      triggers a bug on most SA-110 based devices, such that the stack
4449      pointer won't be correctly restored if the instruction takes a
4450      page fault.  We work around this problem by popping r3 along with
4451      the other registers, since that is never slower than executing
4452      another instruction.
4453
4454      We test for !arm_arch5t here, because code for any architecture
4455      less than this could potentially be run on one of the buggy
4456      chips.  */
4457   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4458     {
4459       /* Validate that r3 is a call-clobbered register (always true in
4460          the default abi) ...  */
4461       if (!call_used_or_fixed_reg_p (3))
4462         return 0;
4463
4464       /* ... that it isn't being used for a return value ... */
4465       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4466         return 0;
4467
4468       /* ... or for a tail-call argument ...  */
4469       if (sibling)
4470         {
4471           gcc_assert (CALL_P (sibling));
4472
4473           if (find_regno_fusage (sibling, USE, 3))
4474             return 0;
4475         }
4476
4477       /* ... and that there are no call-saved registers in r0-r2
4478          (always true in the default ABI).  */
4479       if (saved_int_regs & 0x7)
4480         return 0;
4481     }
4482
4483   /* Can't be done if interworking with Thumb, and any registers have been
4484      stacked.  */
4485   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4486     return 0;
4487
4488   /* On StrongARM, conditional returns are expensive if they aren't
4489      taken and multiple registers have been stacked.  */
4490   if (iscond && arm_tune_strongarm)
4491     {
4492       /* Conditional return when just the LR is stored is a simple
4493          conditional-load instruction, that's not expensive.  */
4494       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4495         return 0;
4496
4497       if (flag_pic
4498           && arm_pic_register != INVALID_REGNUM
4499           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4500         return 0;
4501     }
4502
4503   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4504      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4505      also needs several instructions to save and restore FP context.  */
4506   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4507     return 0;
4508
4509   /* If there are saved registers but the LR isn't saved, then we need
4510      two instructions for the return.  */
4511   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4512     return 0;
4513
4514   /* Can't be done if any of the VFP regs are pushed,
4515      since this also requires an insn.  */
4516   if (TARGET_VFP_BASE)
4517     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4518       if (reg_needs_saving_p (regno))
4519         return 0;
4520
4521   if (TARGET_REALLY_IWMMXT)
4522     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4523       if (reg_needs_saving_p (regno))
4524         return 0;
4525
4526   return 1;
4527 }
4528
4529 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4530    shrink-wrapping if possible.  This is the case if we need to emit a
4531    prologue, which we can test by looking at the offsets.  */
4532 bool
4533 use_simple_return_p (void)
4534 {
4535   arm_stack_offsets *offsets;
4536
4537   /* Note this function can be called before or after reload.  */
4538   if (!reload_completed)
4539     arm_compute_frame_layout ();
4540
4541   offsets = arm_get_frame_offsets ();
4542   return offsets->outgoing_args != 0;
4543 }
4544
4545 /* Return TRUE if int I is a valid immediate ARM constant.  */
4546
4547 int
4548 const_ok_for_arm (HOST_WIDE_INT i)
4549 {
4550   int lowbit;
4551
4552   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4553      be all zero, or all one.  */
4554   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4555       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4556           != ((~(unsigned HOST_WIDE_INT) 0)
4557               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4558     return FALSE;
4559
4560   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4561
4562   /* Fast return for 0 and small values.  We must do this for zero, since
4563      the code below can't handle that one case.  */
4564   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4565     return TRUE;
4566
4567   /* Get the number of trailing zeros.  */
4568   lowbit = ffs((int) i) - 1;
4569
4570   /* Only even shifts are allowed in ARM mode so round down to the
4571      nearest even number.  */
4572   if (TARGET_ARM)
4573     lowbit &= ~1;
4574
4575   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4576     return TRUE;
4577
4578   if (TARGET_ARM)
4579     {
4580       /* Allow rotated constants in ARM mode.  */
4581       if (lowbit <= 4
4582            && ((i & ~0xc000003f) == 0
4583                || (i & ~0xf000000f) == 0
4584                || (i & ~0xfc000003) == 0))
4585         return TRUE;
4586     }
4587   else if (TARGET_THUMB2)
4588     {
4589       HOST_WIDE_INT v;
4590
4591       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4592       v = i & 0xff;
4593       v |= v << 16;
4594       if (i == v || i == (v | (v << 8)))
4595         return TRUE;
4596
4597       /* Allow repeated pattern 0xXY00XY00.  */
4598       v = i & 0xff00;
4599       v |= v << 16;
4600       if (i == v)
4601         return TRUE;
4602     }
4603   else if (TARGET_HAVE_MOVT)
4604     {
4605       /* Thumb-1 Targets with MOVT.  */
4606       if (i > 0xffff)
4607         return FALSE;
4608       else
4609         return TRUE;
4610     }
4611
4612   return FALSE;
4613 }
4614
4615 /* Return true if I is a valid constant for the operation CODE.  */
4616 int
4617 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4618 {
4619   if (const_ok_for_arm (i))
4620     return 1;
4621
4622   switch (code)
4623     {
4624     case SET:
4625       /* See if we can use movw.  */
4626       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4627         return 1;
4628       else
4629         /* Otherwise, try mvn.  */
4630         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4631
4632     case PLUS:
4633       /* See if we can use addw or subw.  */
4634       if (TARGET_THUMB2
4635           && ((i & 0xfffff000) == 0
4636               || ((-i) & 0xfffff000) == 0))
4637         return 1;
4638       /* Fall through.  */
4639     case COMPARE:
4640     case EQ:
4641     case NE:
4642     case GT:
4643     case LE:
4644     case LT:
4645     case GE:
4646     case GEU:
4647     case LTU:
4648     case GTU:
4649     case LEU:
4650     case UNORDERED:
4651     case ORDERED:
4652     case UNEQ:
4653     case UNGE:
4654     case UNLT:
4655     case UNGT:
4656     case UNLE:
4657       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4658
4659     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4660     case XOR:
4661       return 0;
4662
4663     case IOR:
4664       if (TARGET_THUMB2)
4665         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4666       return 0;
4667
4668     case AND:
4669       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4670
4671     default:
4672       gcc_unreachable ();
4673     }
4674 }
4675
4676 /* Return true if I is a valid di mode constant for the operation CODE.  */
4677 int
4678 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4679 {
4680   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4681   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4682   rtx hi = GEN_INT (hi_val);
4683   rtx lo = GEN_INT (lo_val);
4684
4685   if (TARGET_THUMB1)
4686     return 0;
4687
4688   switch (code)
4689     {
4690     case AND:
4691     case IOR:
4692     case XOR:
4693       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4694              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4695     case PLUS:
4696       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4697
4698     default:
4699       return 0;
4700     }
4701 }
4702
4703 /* Emit a sequence of insns to handle a large constant.
4704    CODE is the code of the operation required, it can be any of SET, PLUS,
4705    IOR, AND, XOR, MINUS;
4706    MODE is the mode in which the operation is being performed;
4707    VAL is the integer to operate on;
4708    SOURCE is the other operand (a register, or a null-pointer for SET);
4709    SUBTARGETS means it is safe to create scratch registers if that will
4710    either produce a simpler sequence, or we will want to cse the values.
4711    Return value is the number of insns emitted.  */
4712
4713 /* ??? Tweak this for thumb2.  */
4714 int
4715 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4716                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4717 {
4718   rtx cond;
4719
4720   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4721     cond = COND_EXEC_TEST (PATTERN (insn));
4722   else
4723     cond = NULL_RTX;
4724
4725   if (subtargets || code == SET
4726       || (REG_P (target) && REG_P (source)
4727           && REGNO (target) != REGNO (source)))
4728     {
4729       /* After arm_reorg has been called, we can't fix up expensive
4730          constants by pushing them into memory so we must synthesize
4731          them in-line, regardless of the cost.  This is only likely to
4732          be more costly on chips that have load delay slots and we are
4733          compiling without running the scheduler (so no splitting
4734          occurred before the final instruction emission).
4735
4736          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4737       */
4738       if (!cfun->machine->after_arm_reorg
4739           && !cond
4740           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4741                                 1, 0)
4742               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4743                  + (code != SET))))
4744         {
4745           if (code == SET)
4746             {
4747               /* Currently SET is the only monadic value for CODE, all
4748                  the rest are diadic.  */
4749               if (TARGET_USE_MOVT)
4750                 arm_emit_movpair (target, GEN_INT (val));
4751               else
4752                 emit_set_insn (target, GEN_INT (val));
4753
4754               return 1;
4755             }
4756           else
4757             {
4758               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4759
4760               if (TARGET_USE_MOVT)
4761                 arm_emit_movpair (temp, GEN_INT (val));
4762               else
4763                 emit_set_insn (temp, GEN_INT (val));
4764
4765               /* For MINUS, the value is subtracted from, since we never
4766                  have subtraction of a constant.  */
4767               if (code == MINUS)
4768                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4769               else
4770                 emit_set_insn (target,
4771                                gen_rtx_fmt_ee (code, mode, source, temp));
4772               return 2;
4773             }
4774         }
4775     }
4776
4777   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4778                            1);
4779 }
4780
4781 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4782    ARM/THUMB2 immediates, and add up to VAL.
4783    Thr function return value gives the number of insns required.  */
4784 static int
4785 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4786                             struct four_ints *return_sequence)
4787 {
4788   int best_consecutive_zeros = 0;
4789   int i;
4790   int best_start = 0;
4791   int insns1, insns2;
4792   struct four_ints tmp_sequence;
4793
4794   /* If we aren't targeting ARM, the best place to start is always at
4795      the bottom, otherwise look more closely.  */
4796   if (TARGET_ARM)
4797     {
4798       for (i = 0; i < 32; i += 2)
4799         {
4800           int consecutive_zeros = 0;
4801
4802           if (!(val & (3 << i)))
4803             {
4804               while ((i < 32) && !(val & (3 << i)))
4805                 {
4806                   consecutive_zeros += 2;
4807                   i += 2;
4808                 }
4809               if (consecutive_zeros > best_consecutive_zeros)
4810                 {
4811                   best_consecutive_zeros = consecutive_zeros;
4812                   best_start = i - consecutive_zeros;
4813                 }
4814               i -= 2;
4815             }
4816         }
4817     }
4818
4819   /* So long as it won't require any more insns to do so, it's
4820      desirable to emit a small constant (in bits 0...9) in the last
4821      insn.  This way there is more chance that it can be combined with
4822      a later addressing insn to form a pre-indexed load or store
4823      operation.  Consider:
4824
4825            *((volatile int *)0xe0000100) = 1;
4826            *((volatile int *)0xe0000110) = 2;
4827
4828      We want this to wind up as:
4829
4830             mov rA, #0xe0000000
4831             mov rB, #1
4832             str rB, [rA, #0x100]
4833             mov rB, #2
4834             str rB, [rA, #0x110]
4835
4836      rather than having to synthesize both large constants from scratch.
4837
4838      Therefore, we calculate how many insns would be required to emit
4839      the constant starting from `best_start', and also starting from
4840      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4841      yield a shorter sequence, we may as well use zero.  */
4842   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4843   if (best_start != 0
4844       && ((HOST_WIDE_INT_1U << best_start) < val))
4845     {
4846       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4847       if (insns2 <= insns1)
4848         {
4849           *return_sequence = tmp_sequence;
4850           insns1 = insns2;
4851         }
4852     }
4853
4854   return insns1;
4855 }
4856
4857 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4858 static int
4859 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4860                              struct four_ints *return_sequence, int i)
4861 {
4862   int remainder = val & 0xffffffff;
4863   int insns = 0;
4864
4865   /* Try and find a way of doing the job in either two or three
4866      instructions.
4867
4868      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4869      location.  We start at position I.  This may be the MSB, or
4870      optimial_immediate_sequence may have positioned it at the largest block
4871      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4872      wrapping around to the top of the word when we drop off the bottom.
4873      In the worst case this code should produce no more than four insns.
4874
4875      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4876      constants, shifted to any arbitrary location.  We should always start
4877      at the MSB.  */
4878   do
4879     {
4880       int end;
4881       unsigned int b1, b2, b3, b4;
4882       unsigned HOST_WIDE_INT result;
4883       int loc;
4884
4885       gcc_assert (insns < 4);
4886
4887       if (i <= 0)
4888         i += 32;
4889
4890       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4891       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4892         {
4893           loc = i;
4894           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4895             /* We can use addw/subw for the last 12 bits.  */
4896             result = remainder;
4897           else
4898             {
4899               /* Use an 8-bit shifted/rotated immediate.  */
4900               end = i - 8;
4901               if (end < 0)
4902                 end += 32;
4903               result = remainder & ((0x0ff << end)
4904                                    | ((i < end) ? (0xff >> (32 - end))
4905                                                 : 0));
4906               i -= 8;
4907             }
4908         }
4909       else
4910         {
4911           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4912              arbitrary shifts.  */
4913           i -= TARGET_ARM ? 2 : 1;
4914           continue;
4915         }
4916
4917       /* Next, see if we can do a better job with a thumb2 replicated
4918          constant.
4919
4920          We do it this way around to catch the cases like 0x01F001E0 where
4921          two 8-bit immediates would work, but a replicated constant would
4922          make it worse.
4923
4924          TODO: 16-bit constants that don't clear all the bits, but still win.
4925          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4926       if (TARGET_THUMB2)
4927         {
4928           b1 = (remainder & 0xff000000) >> 24;
4929           b2 = (remainder & 0x00ff0000) >> 16;
4930           b3 = (remainder & 0x0000ff00) >> 8;
4931           b4 = remainder & 0xff;
4932
4933           if (loc > 24)
4934             {
4935               /* The 8-bit immediate already found clears b1 (and maybe b2),
4936                  but must leave b3 and b4 alone.  */
4937
4938               /* First try to find a 32-bit replicated constant that clears
4939                  almost everything.  We can assume that we can't do it in one,
4940                  or else we wouldn't be here.  */
4941               unsigned int tmp = b1 & b2 & b3 & b4;
4942               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4943                                   + (tmp << 24);
4944               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4945                                             + (tmp == b3) + (tmp == b4);
4946               if (tmp
4947                   && (matching_bytes >= 3
4948                       || (matching_bytes == 2
4949                           && const_ok_for_op (remainder & ~tmp2, code))))
4950                 {
4951                   /* At least 3 of the bytes match, and the fourth has at
4952                      least as many bits set, or two of the bytes match
4953                      and it will only require one more insn to finish.  */
4954                   result = tmp2;
4955                   i = tmp != b1 ? 32
4956                       : tmp != b2 ? 24
4957                       : tmp != b3 ? 16
4958                       : 8;
4959                 }
4960
4961               /* Second, try to find a 16-bit replicated constant that can
4962                  leave three of the bytes clear.  If b2 or b4 is already
4963                  zero, then we can.  If the 8-bit from above would not
4964                  clear b2 anyway, then we still win.  */
4965               else if (b1 == b3 && (!b2 || !b4
4966                                || (remainder & 0x00ff0000 & ~result)))
4967                 {
4968                   result = remainder & 0xff00ff00;
4969                   i = 24;
4970                 }
4971             }
4972           else if (loc > 16)
4973             {
4974               /* The 8-bit immediate already found clears b2 (and maybe b3)
4975                  and we don't get here unless b1 is alredy clear, but it will
4976                  leave b4 unchanged.  */
4977
4978               /* If we can clear b2 and b4 at once, then we win, since the
4979                  8-bits couldn't possibly reach that far.  */
4980               if (b2 == b4)
4981                 {
4982                   result = remainder & 0x00ff00ff;
4983                   i = 16;
4984                 }
4985             }
4986         }
4987
4988       return_sequence->i[insns++] = result;
4989       remainder &= ~result;
4990
4991       if (code == SET || code == MINUS)
4992         code = PLUS;
4993     }
4994   while (remainder);
4995
4996   return insns;
4997 }
4998
4999 /* Emit an instruction with the indicated PATTERN.  If COND is
5000    non-NULL, conditionalize the execution of the instruction on COND
5001    being true.  */
5002
5003 static void
5004 emit_constant_insn (rtx cond, rtx pattern)
5005 {
5006   if (cond)
5007     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
5008   emit_insn (pattern);
5009 }
5010
5011 /* As above, but extra parameter GENERATE which, if clear, suppresses
5012    RTL generation.  */
5013
5014 static int
5015 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
5016                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
5017                   int subtargets, int generate)
5018 {
5019   int can_invert = 0;
5020   int can_negate = 0;
5021   int final_invert = 0;
5022   int i;
5023   int set_sign_bit_copies = 0;
5024   int clear_sign_bit_copies = 0;
5025   int clear_zero_bit_copies = 0;
5026   int set_zero_bit_copies = 0;
5027   int insns = 0, neg_insns, inv_insns;
5028   unsigned HOST_WIDE_INT temp1, temp2;
5029   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
5030   struct four_ints *immediates;
5031   struct four_ints pos_immediates, neg_immediates, inv_immediates;
5032
5033   /* Find out which operations are safe for a given CODE.  Also do a quick
5034      check for degenerate cases; these can occur when DImode operations
5035      are split.  */
5036   switch (code)
5037     {
5038     case SET:
5039       can_invert = 1;
5040       break;
5041
5042     case PLUS:
5043       can_negate = 1;
5044       break;
5045
5046     case IOR:
5047       if (remainder == 0xffffffff)
5048         {
5049           if (generate)
5050             emit_constant_insn (cond,
5051                                 gen_rtx_SET (target,
5052                                              GEN_INT (ARM_SIGN_EXTEND (val))));
5053           return 1;
5054         }
5055
5056       if (remainder == 0)
5057         {
5058           if (reload_completed && rtx_equal_p (target, source))
5059             return 0;
5060
5061           if (generate)
5062             emit_constant_insn (cond, gen_rtx_SET (target, source));
5063           return 1;
5064         }
5065       break;
5066
5067     case AND:
5068       if (remainder == 0)
5069         {
5070           if (generate)
5071             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5072           return 1;
5073         }
5074       if (remainder == 0xffffffff)
5075         {
5076           if (reload_completed && rtx_equal_p (target, source))
5077             return 0;
5078           if (generate)
5079             emit_constant_insn (cond, gen_rtx_SET (target, source));
5080           return 1;
5081         }
5082       can_invert = 1;
5083       break;
5084
5085     case XOR:
5086       if (remainder == 0)
5087         {
5088           if (reload_completed && rtx_equal_p (target, source))
5089             return 0;
5090           if (generate)
5091             emit_constant_insn (cond, gen_rtx_SET (target, source));
5092           return 1;
5093         }
5094
5095       if (remainder == 0xffffffff)
5096         {
5097           if (generate)
5098             emit_constant_insn (cond,
5099                                 gen_rtx_SET (target,
5100                                              gen_rtx_NOT (mode, source)));
5101           return 1;
5102         }
5103       final_invert = 1;
5104       break;
5105
5106     case MINUS:
5107       /* We treat MINUS as (val - source), since (source - val) is always
5108          passed as (source + (-val)).  */
5109       if (remainder == 0)
5110         {
5111           if (generate)
5112             emit_constant_insn (cond,
5113                                 gen_rtx_SET (target,
5114                                              gen_rtx_NEG (mode, source)));
5115           return 1;
5116         }
5117       if (const_ok_for_arm (val))
5118         {
5119           if (generate)
5120             emit_constant_insn (cond,
5121                                 gen_rtx_SET (target,
5122                                              gen_rtx_MINUS (mode, GEN_INT (val),
5123                                                             source)));
5124           return 1;
5125         }
5126
5127       break;
5128
5129     default:
5130       gcc_unreachable ();
5131     }
5132
5133   /* If we can do it in one insn get out quickly.  */
5134   if (const_ok_for_op (val, code))
5135     {
5136       if (generate)
5137         emit_constant_insn (cond,
5138                             gen_rtx_SET (target,
5139                                          (source
5140                                           ? gen_rtx_fmt_ee (code, mode, source,
5141                                                             GEN_INT (val))
5142                                           : GEN_INT (val))));
5143       return 1;
5144     }
5145
5146   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5147      insn.  */
5148   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5149       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5150     {
5151       if (generate)
5152         {
5153           if (mode == SImode && i == 16)
5154             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5155                smaller insn.  */
5156             emit_constant_insn (cond,
5157                                 gen_zero_extendhisi2
5158                                 (target, gen_lowpart (HImode, source)));
5159           else
5160             /* Extz only supports SImode, but we can coerce the operands
5161                into that mode.  */
5162             emit_constant_insn (cond,
5163                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5164                                               gen_lowpart (SImode, source),
5165                                               GEN_INT (i), const0_rtx));
5166         }
5167
5168       return 1;
5169     }
5170
5171   /* Calculate a few attributes that may be useful for specific
5172      optimizations.  */
5173   /* Count number of leading zeros.  */
5174   for (i = 31; i >= 0; i--)
5175     {
5176       if ((remainder & (1 << i)) == 0)
5177         clear_sign_bit_copies++;
5178       else
5179         break;
5180     }
5181
5182   /* Count number of leading 1's.  */
5183   for (i = 31; i >= 0; i--)
5184     {
5185       if ((remainder & (1 << i)) != 0)
5186         set_sign_bit_copies++;
5187       else
5188         break;
5189     }
5190
5191   /* Count number of trailing zero's.  */
5192   for (i = 0; i <= 31; i++)
5193     {
5194       if ((remainder & (1 << i)) == 0)
5195         clear_zero_bit_copies++;
5196       else
5197         break;
5198     }
5199
5200   /* Count number of trailing 1's.  */
5201   for (i = 0; i <= 31; i++)
5202     {
5203       if ((remainder & (1 << i)) != 0)
5204         set_zero_bit_copies++;
5205       else
5206         break;
5207     }
5208
5209   switch (code)
5210     {
5211     case SET:
5212       /* See if we can do this by sign_extending a constant that is known
5213          to be negative.  This is a good, way of doing it, since the shift
5214          may well merge into a subsequent insn.  */
5215       if (set_sign_bit_copies > 1)
5216         {
5217           if (const_ok_for_arm
5218               (temp1 = ARM_SIGN_EXTEND (remainder
5219                                         << (set_sign_bit_copies - 1))))
5220             {
5221               if (generate)
5222                 {
5223                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5224                   emit_constant_insn (cond,
5225                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5226                   emit_constant_insn (cond,
5227                                       gen_ashrsi3 (target, new_src,
5228                                                    GEN_INT (set_sign_bit_copies - 1)));
5229                 }
5230               return 2;
5231             }
5232           /* For an inverted constant, we will need to set the low bits,
5233              these will be shifted out of harm's way.  */
5234           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5235           if (const_ok_for_arm (~temp1))
5236             {
5237               if (generate)
5238                 {
5239                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5240                   emit_constant_insn (cond,
5241                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5242                   emit_constant_insn (cond,
5243                                       gen_ashrsi3 (target, new_src,
5244                                                    GEN_INT (set_sign_bit_copies - 1)));
5245                 }
5246               return 2;
5247             }
5248         }
5249
5250       /* See if we can calculate the value as the difference between two
5251          valid immediates.  */
5252       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5253         {
5254           int topshift = clear_sign_bit_copies & ~1;
5255
5256           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5257                                    & (0xff000000 >> topshift));
5258
5259           /* If temp1 is zero, then that means the 9 most significant
5260              bits of remainder were 1 and we've caused it to overflow.
5261              When topshift is 0 we don't need to do anything since we
5262              can borrow from 'bit 32'.  */
5263           if (temp1 == 0 && topshift != 0)
5264             temp1 = 0x80000000 >> (topshift - 1);
5265
5266           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5267
5268           if (const_ok_for_arm (temp2))
5269             {
5270               if (generate)
5271                 {
5272                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5273                   emit_constant_insn (cond,
5274                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5275                   emit_constant_insn (cond,
5276                                       gen_addsi3 (target, new_src,
5277                                                   GEN_INT (-temp2)));
5278                 }
5279
5280               return 2;
5281             }
5282         }
5283
5284       /* See if we can generate this by setting the bottom (or the top)
5285          16 bits, and then shifting these into the other half of the
5286          word.  We only look for the simplest cases, to do more would cost
5287          too much.  Be careful, however, not to generate this when the
5288          alternative would take fewer insns.  */
5289       if (val & 0xffff0000)
5290         {
5291           temp1 = remainder & 0xffff0000;
5292           temp2 = remainder & 0x0000ffff;
5293
5294           /* Overlaps outside this range are best done using other methods.  */
5295           for (i = 9; i < 24; i++)
5296             {
5297               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5298                   && !const_ok_for_arm (temp2))
5299                 {
5300                   rtx new_src = (subtargets
5301                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5302                                  : target);
5303                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5304                                             source, subtargets, generate);
5305                   source = new_src;
5306                   if (generate)
5307                     emit_constant_insn
5308                       (cond,
5309                        gen_rtx_SET
5310                        (target,
5311                         gen_rtx_IOR (mode,
5312                                      gen_rtx_ASHIFT (mode, source,
5313                                                      GEN_INT (i)),
5314                                      source)));
5315                   return insns + 1;
5316                 }
5317             }
5318
5319           /* Don't duplicate cases already considered.  */
5320           for (i = 17; i < 24; i++)
5321             {
5322               if (((temp1 | (temp1 >> i)) == remainder)
5323                   && !const_ok_for_arm (temp1))
5324                 {
5325                   rtx new_src = (subtargets
5326                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5327                                  : target);
5328                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5329                                             source, subtargets, generate);
5330                   source = new_src;
5331                   if (generate)
5332                     emit_constant_insn
5333                       (cond,
5334                        gen_rtx_SET (target,
5335                                     gen_rtx_IOR
5336                                     (mode,
5337                                      gen_rtx_LSHIFTRT (mode, source,
5338                                                        GEN_INT (i)),
5339                                      source)));
5340                   return insns + 1;
5341                 }
5342             }
5343         }
5344       break;
5345
5346     case IOR:
5347     case XOR:
5348       /* If we have IOR or XOR, and the constant can be loaded in a
5349          single instruction, and we can find a temporary to put it in,
5350          then this can be done in two instructions instead of 3-4.  */
5351       if (subtargets
5352           /* TARGET can't be NULL if SUBTARGETS is 0 */
5353           || (reload_completed && !reg_mentioned_p (target, source)))
5354         {
5355           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5356             {
5357               if (generate)
5358                 {
5359                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5360
5361                   emit_constant_insn (cond,
5362                                       gen_rtx_SET (sub, GEN_INT (val)));
5363                   emit_constant_insn (cond,
5364                                       gen_rtx_SET (target,
5365                                                    gen_rtx_fmt_ee (code, mode,
5366                                                                    source, sub)));
5367                 }
5368               return 2;
5369             }
5370         }
5371
5372       if (code == XOR)
5373         break;
5374
5375       /*  Convert.
5376           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5377                              and the remainder 0s for e.g. 0xfff00000)
5378           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5379
5380           This can be done in 2 instructions by using shifts with mov or mvn.
5381           e.g. for
5382           x = x | 0xfff00000;
5383           we generate.
5384           mvn   r0, r0, asl #12
5385           mvn   r0, r0, lsr #12  */
5386       if (set_sign_bit_copies > 8
5387           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5388         {
5389           if (generate)
5390             {
5391               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5392               rtx shift = GEN_INT (set_sign_bit_copies);
5393
5394               emit_constant_insn
5395                 (cond,
5396                  gen_rtx_SET (sub,
5397                               gen_rtx_NOT (mode,
5398                                            gen_rtx_ASHIFT (mode,
5399                                                            source,
5400                                                            shift))));
5401               emit_constant_insn
5402                 (cond,
5403                  gen_rtx_SET (target,
5404                               gen_rtx_NOT (mode,
5405                                            gen_rtx_LSHIFTRT (mode, sub,
5406                                                              shift))));
5407             }
5408           return 2;
5409         }
5410
5411       /* Convert
5412           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5413            to
5414           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5415
5416           For eg. r0 = r0 | 0xfff
5417                mvn      r0, r0, lsr #12
5418                mvn      r0, r0, asl #12
5419
5420       */
5421       if (set_zero_bit_copies > 8
5422           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5423         {
5424           if (generate)
5425             {
5426               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5427               rtx shift = GEN_INT (set_zero_bit_copies);
5428
5429               emit_constant_insn
5430                 (cond,
5431                  gen_rtx_SET (sub,
5432                               gen_rtx_NOT (mode,
5433                                            gen_rtx_LSHIFTRT (mode,
5434                                                              source,
5435                                                              shift))));
5436               emit_constant_insn
5437                 (cond,
5438                  gen_rtx_SET (target,
5439                               gen_rtx_NOT (mode,
5440                                            gen_rtx_ASHIFT (mode, sub,
5441                                                            shift))));
5442             }
5443           return 2;
5444         }
5445
5446       /* This will never be reached for Thumb2 because orn is a valid
5447          instruction. This is for Thumb1 and the ARM 32 bit cases.
5448
5449          x = y | constant (such that ~constant is a valid constant)
5450          Transform this to
5451          x = ~(~y & ~constant).
5452       */
5453       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5454         {
5455           if (generate)
5456             {
5457               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5458               emit_constant_insn (cond,
5459                                   gen_rtx_SET (sub,
5460                                                gen_rtx_NOT (mode, source)));
5461               source = sub;
5462               if (subtargets)
5463                 sub = gen_reg_rtx (mode);
5464               emit_constant_insn (cond,
5465                                   gen_rtx_SET (sub,
5466                                                gen_rtx_AND (mode, source,
5467                                                             GEN_INT (temp1))));
5468               emit_constant_insn (cond,
5469                                   gen_rtx_SET (target,
5470                                                gen_rtx_NOT (mode, sub)));
5471             }
5472           return 3;
5473         }
5474       break;
5475
5476     case AND:
5477       /* See if two shifts will do 2 or more insn's worth of work.  */
5478       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5479         {
5480           HOST_WIDE_INT shift_mask = ((0xffffffff
5481                                        << (32 - clear_sign_bit_copies))
5482                                       & 0xffffffff);
5483
5484           if ((remainder | shift_mask) != 0xffffffff)
5485             {
5486               HOST_WIDE_INT new_val
5487                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5488
5489               if (generate)
5490                 {
5491                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5492                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5493                                             new_src, source, subtargets, 1);
5494                   source = new_src;
5495                 }
5496               else
5497                 {
5498                   rtx targ = subtargets ? NULL_RTX : target;
5499                   insns = arm_gen_constant (AND, mode, cond, new_val,
5500                                             targ, source, subtargets, 0);
5501                 }
5502             }
5503
5504           if (generate)
5505             {
5506               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5507               rtx shift = GEN_INT (clear_sign_bit_copies);
5508
5509               emit_insn (gen_ashlsi3 (new_src, source, shift));
5510               emit_insn (gen_lshrsi3 (target, new_src, shift));
5511             }
5512
5513           return insns + 2;
5514         }
5515
5516       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5517         {
5518           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5519
5520           if ((remainder | shift_mask) != 0xffffffff)
5521             {
5522               HOST_WIDE_INT new_val
5523                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5524               if (generate)
5525                 {
5526                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5527
5528                   insns = arm_gen_constant (AND, mode, cond, new_val,
5529                                             new_src, source, subtargets, 1);
5530                   source = new_src;
5531                 }
5532               else
5533                 {
5534                   rtx targ = subtargets ? NULL_RTX : target;
5535
5536                   insns = arm_gen_constant (AND, mode, cond, new_val,
5537                                             targ, source, subtargets, 0);
5538                 }
5539             }
5540
5541           if (generate)
5542             {
5543               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5544               rtx shift = GEN_INT (clear_zero_bit_copies);
5545
5546               emit_insn (gen_lshrsi3 (new_src, source, shift));
5547               emit_insn (gen_ashlsi3 (target, new_src, shift));
5548             }
5549
5550           return insns + 2;
5551         }
5552
5553       break;
5554
5555     default:
5556       break;
5557     }
5558
5559   /* Calculate what the instruction sequences would be if we generated it
5560      normally, negated, or inverted.  */
5561   if (code == AND)
5562     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5563     insns = 99;
5564   else
5565     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5566
5567   if (can_negate)
5568     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5569                                             &neg_immediates);
5570   else
5571     neg_insns = 99;
5572
5573   if (can_invert || final_invert)
5574     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5575                                             &inv_immediates);
5576   else
5577     inv_insns = 99;
5578
5579   immediates = &pos_immediates;
5580
5581   /* Is the negated immediate sequence more efficient?  */
5582   if (neg_insns < insns && neg_insns <= inv_insns)
5583     {
5584       insns = neg_insns;
5585       immediates = &neg_immediates;
5586     }
5587   else
5588     can_negate = 0;
5589
5590   /* Is the inverted immediate sequence more efficient?
5591      We must allow for an extra NOT instruction for XOR operations, although
5592      there is some chance that the final 'mvn' will get optimized later.  */
5593   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5594     {
5595       insns = inv_insns;
5596       immediates = &inv_immediates;
5597     }
5598   else
5599     {
5600       can_invert = 0;
5601       final_invert = 0;
5602     }
5603
5604   /* Now output the chosen sequence as instructions.  */
5605   if (generate)
5606     {
5607       for (i = 0; i < insns; i++)
5608         {
5609           rtx new_src, temp1_rtx;
5610
5611           temp1 = immediates->i[i];
5612
5613           if (code == SET || code == MINUS)
5614             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5615           else if ((final_invert || i < (insns - 1)) && subtargets)
5616             new_src = gen_reg_rtx (mode);
5617           else
5618             new_src = target;
5619
5620           if (can_invert)
5621             temp1 = ~temp1;
5622           else if (can_negate)
5623             temp1 = -temp1;
5624
5625           temp1 = trunc_int_for_mode (temp1, mode);
5626           temp1_rtx = GEN_INT (temp1);
5627
5628           if (code == SET)
5629             ;
5630           else if (code == MINUS)
5631             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5632           else
5633             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5634
5635           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5636           source = new_src;
5637
5638           if (code == SET)
5639             {
5640               can_negate = can_invert;
5641               can_invert = 0;
5642               code = PLUS;
5643             }
5644           else if (code == MINUS)
5645             code = PLUS;
5646         }
5647     }
5648
5649   if (final_invert)
5650     {
5651       if (generate)
5652         emit_constant_insn (cond, gen_rtx_SET (target,
5653                                                gen_rtx_NOT (mode, source)));
5654       insns++;
5655     }
5656
5657   return insns;
5658 }
5659
5660 /* Return TRUE if op is a constant where both the low and top words are
5661    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5662    we do not have RSC in that case.  */
5663 static bool
5664 arm_const_double_prefer_rsbs_rsc (rtx op)
5665 {
5666   /* Thumb lacks RSC, so we never prefer that sequence.  */
5667   if (TARGET_THUMB || !CONST_INT_P (op))
5668     return false;
5669   HOST_WIDE_INT hi, lo;
5670   lo = UINTVAL (op) & 0xffffffffULL;
5671   hi = UINTVAL (op) >> 32;
5672   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5673 }
5674
5675 /* Canonicalize a comparison so that we are more likely to recognize it.
5676    This can be done for a few constant compares, where we can make the
5677    immediate value easier to load.  */
5678
5679 static void
5680 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5681                              bool op0_preserve_value)
5682 {
5683   machine_mode mode;
5684   unsigned HOST_WIDE_INT i, maxval;
5685
5686   mode = GET_MODE (*op0);
5687   if (mode == VOIDmode)
5688     mode = GET_MODE (*op1);
5689
5690   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5691
5692   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5693      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5694      either reversed or (for constant OP1) adjusted to GE/LT.
5695      Similarly for GTU/LEU in Thumb mode.  */
5696   if (mode == DImode)
5697     {
5698
5699       if (*code == GT || *code == LE
5700           || *code == GTU || *code == LEU)
5701         {
5702           /* Missing comparison.  First try to use an available
5703              comparison.  */
5704           if (CONST_INT_P (*op1))
5705             {
5706               i = INTVAL (*op1);
5707               switch (*code)
5708                 {
5709                 case GT:
5710                 case LE:
5711                   if (i != maxval)
5712                     {
5713                       /* Try to convert to GE/LT, unless that would be more
5714                          expensive.  */
5715                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5716                           && arm_const_double_prefer_rsbs_rsc (*op1))
5717                         return;
5718                       *op1 = GEN_INT (i + 1);
5719                       *code = *code == GT ? GE : LT;
5720                     }
5721                   else
5722                     {
5723                       /* GT maxval is always false, LE maxval is always true.
5724                          We can't fold that away here as we must make a
5725                          comparison, but we can fold them to comparisons
5726                          with the same result that can be handled:
5727                            op0 GT maxval -> op0 LT minval
5728                            op0 LE maxval -> op0 GE minval
5729                          where minval = (-maxval - 1).  */
5730                       *op1 = GEN_INT (-maxval - 1);
5731                       *code = *code == GT ? LT : GE;
5732                     }
5733                   return;
5734
5735                 case GTU:
5736                 case LEU:
5737                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5738                     {
5739                       /* Try to convert to GEU/LTU, unless that would
5740                          be more expensive.  */
5741                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5742                           && arm_const_double_prefer_rsbs_rsc (*op1))
5743                         return;
5744                       *op1 = GEN_INT (i + 1);
5745                       *code = *code == GTU ? GEU : LTU;
5746                     }
5747                   else
5748                     {
5749                       /* GTU ~0 is always false, LEU ~0 is always true.
5750                          We can't fold that away here as we must make a
5751                          comparison, but we can fold them to comparisons
5752                          with the same result that can be handled:
5753                            op0 GTU ~0 -> op0 LTU 0
5754                            op0 LEU ~0 -> op0 GEU 0.  */
5755                       *op1 = const0_rtx;
5756                       *code = *code == GTU ? LTU : GEU;
5757                     }
5758                   return;
5759
5760                 default:
5761                   gcc_unreachable ();
5762                 }
5763             }
5764
5765           if (!op0_preserve_value)
5766             {
5767               std::swap (*op0, *op1);
5768               *code = (int)swap_condition ((enum rtx_code)*code);
5769             }
5770         }
5771       return;
5772     }
5773
5774   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5775      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5776      to facilitate possible combining with a cmp into 'ands'.  */
5777   if (mode == SImode
5778       && GET_CODE (*op0) == ZERO_EXTEND
5779       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5780       && GET_MODE (XEXP (*op0, 0)) == QImode
5781       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5782       && subreg_lowpart_p (XEXP (*op0, 0))
5783       && *op1 == const0_rtx)
5784     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5785                         GEN_INT (255));
5786
5787   /* Comparisons smaller than DImode.  Only adjust comparisons against
5788      an out-of-range constant.  */
5789   if (!CONST_INT_P (*op1)
5790       || const_ok_for_arm (INTVAL (*op1))
5791       || const_ok_for_arm (- INTVAL (*op1)))
5792     return;
5793
5794   i = INTVAL (*op1);
5795
5796   switch (*code)
5797     {
5798     case EQ:
5799     case NE:
5800       return;
5801
5802     case GT:
5803     case LE:
5804       if (i != maxval
5805           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5806         {
5807           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5808           *code = *code == GT ? GE : LT;
5809           return;
5810         }
5811       break;
5812
5813     case GE:
5814     case LT:
5815       if (i != ~maxval
5816           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5817         {
5818           *op1 = GEN_INT (i - 1);
5819           *code = *code == GE ? GT : LE;
5820           return;
5821         }
5822       break;
5823
5824     case GTU:
5825     case LEU:
5826       if (i != ~((unsigned HOST_WIDE_INT) 0)
5827           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5828         {
5829           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5830           *code = *code == GTU ? GEU : LTU;
5831           return;
5832         }
5833       break;
5834
5835     case GEU:
5836     case LTU:
5837       if (i != 0
5838           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5839         {
5840           *op1 = GEN_INT (i - 1);
5841           *code = *code == GEU ? GTU : LEU;
5842           return;
5843         }
5844       break;
5845
5846     default:
5847       gcc_unreachable ();
5848     }
5849 }
5850
5851
5852 /* Define how to find the value returned by a function.  */
5853
5854 static rtx
5855 arm_function_value(const_tree type, const_tree func,
5856                    bool outgoing ATTRIBUTE_UNUSED)
5857 {
5858   machine_mode mode;
5859   int unsignedp ATTRIBUTE_UNUSED;
5860   rtx r ATTRIBUTE_UNUSED;
5861
5862   mode = TYPE_MODE (type);
5863
5864   if (TARGET_AAPCS_BASED)
5865     return aapcs_allocate_return_reg (mode, type, func);
5866
5867   /* Promote integer types.  */
5868   if (INTEGRAL_TYPE_P (type))
5869     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5870
5871   /* Promotes small structs returned in a register to full-word size
5872      for big-endian AAPCS.  */
5873   if (arm_return_in_msb (type))
5874     {
5875       HOST_WIDE_INT size = int_size_in_bytes (type);
5876       if (size % UNITS_PER_WORD != 0)
5877         {
5878           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5879           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5880         }
5881     }
5882
5883   return arm_libcall_value_1 (mode);
5884 }
5885
5886 /* libcall hashtable helpers.  */
5887
5888 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5889 {
5890   static inline hashval_t hash (const rtx_def *);
5891   static inline bool equal (const rtx_def *, const rtx_def *);
5892   static inline void remove (rtx_def *);
5893 };
5894
5895 inline bool
5896 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5897 {
5898   return rtx_equal_p (p1, p2);
5899 }
5900
5901 inline hashval_t
5902 libcall_hasher::hash (const rtx_def *p1)
5903 {
5904   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5905 }
5906
5907 typedef hash_table<libcall_hasher> libcall_table_type;
5908
5909 static void
5910 add_libcall (libcall_table_type *htab, rtx libcall)
5911 {
5912   *htab->find_slot (libcall, INSERT) = libcall;
5913 }
5914
5915 static bool
5916 arm_libcall_uses_aapcs_base (const_rtx libcall)
5917 {
5918   static bool init_done = false;
5919   static libcall_table_type *libcall_htab = NULL;
5920
5921   if (!init_done)
5922     {
5923       init_done = true;
5924
5925       libcall_htab = new libcall_table_type (31);
5926       add_libcall (libcall_htab,
5927                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5928       add_libcall (libcall_htab,
5929                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5930       add_libcall (libcall_htab,
5931                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5932       add_libcall (libcall_htab,
5933                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5934
5935       add_libcall (libcall_htab,
5936                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5937       add_libcall (libcall_htab,
5938                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5939       add_libcall (libcall_htab,
5940                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5941       add_libcall (libcall_htab,
5942                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5943
5944       add_libcall (libcall_htab,
5945                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5946       add_libcall (libcall_htab,
5947                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5948       add_libcall (libcall_htab,
5949                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5950       add_libcall (libcall_htab,
5951                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5952       add_libcall (libcall_htab,
5953                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5954       add_libcall (libcall_htab,
5955                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5956       add_libcall (libcall_htab,
5957                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5958       add_libcall (libcall_htab,
5959                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5960       add_libcall (libcall_htab,
5961                    convert_optab_libfunc (sfix_optab, SImode, SFmode));
5962       add_libcall (libcall_htab,
5963                    convert_optab_libfunc (ufix_optab, SImode, SFmode));
5964
5965       /* Values from double-precision helper functions are returned in core
5966          registers if the selected core only supports single-precision
5967          arithmetic, even if we are using the hard-float ABI.  The same is
5968          true for single-precision helpers except in case of MVE, because in
5969          MVE we will be using the hard-float ABI on a CPU which doesn't support
5970          single-precision operations in hardware.  In MVE the following check
5971          enables use of emulation for the single-precision arithmetic
5972          operations.  */
5973       if (TARGET_HAVE_MVE)
5974         {
5975           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5976           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5977           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5978           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5979           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5980           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5981           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5982           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5983           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5984           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5985           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5986         }
5987       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5988       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5989       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5990       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5991       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5992       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5993       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5994       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5995       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5996       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5997       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5998       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5999                                                         SFmode));
6000       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
6001                                                         DFmode));
6002       add_libcall (libcall_htab,
6003                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
6004     }
6005
6006   return libcall && libcall_htab->find (libcall) != NULL;
6007 }
6008
6009 static rtx
6010 arm_libcall_value_1 (machine_mode mode)
6011 {
6012   if (TARGET_AAPCS_BASED)
6013     return aapcs_libcall_value (mode);
6014   else if (TARGET_IWMMXT_ABI
6015            && arm_vector_mode_supported_p (mode))
6016     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
6017   else
6018     return gen_rtx_REG (mode, ARG_REGISTER (1));
6019 }
6020
6021 /* Define how to find the value returned by a library function
6022    assuming the value has mode MODE.  */
6023
6024 static rtx
6025 arm_libcall_value (machine_mode mode, const_rtx libcall)
6026 {
6027   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
6028       && GET_MODE_CLASS (mode) == MODE_FLOAT)
6029     {
6030       /* The following libcalls return their result in integer registers,
6031          even though they return a floating point value.  */
6032       if (arm_libcall_uses_aapcs_base (libcall))
6033         return gen_rtx_REG (mode, ARG_REGISTER(1));
6034
6035     }
6036
6037   return arm_libcall_value_1 (mode);
6038 }
6039
6040 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
6041
6042 static bool
6043 arm_function_value_regno_p (const unsigned int regno)
6044 {
6045   if (regno == ARG_REGISTER (1)
6046       || (TARGET_32BIT
6047           && TARGET_AAPCS_BASED
6048           && TARGET_HARD_FLOAT
6049           && regno == FIRST_VFP_REGNUM)
6050       || (TARGET_IWMMXT_ABI
6051           && regno == FIRST_IWMMXT_REGNUM))
6052     return true;
6053
6054   return false;
6055 }
6056
6057 /* Determine the amount of memory needed to store the possible return
6058    registers of an untyped call.  */
6059 int
6060 arm_apply_result_size (void)
6061 {
6062   int size = 16;
6063
6064   if (TARGET_32BIT)
6065     {
6066       if (TARGET_HARD_FLOAT_ABI)
6067         size += 32;
6068       if (TARGET_IWMMXT_ABI)
6069         size += 8;
6070     }
6071
6072   return size;
6073 }
6074
6075 /* Decide whether TYPE should be returned in memory (true)
6076    or in a register (false).  FNTYPE is the type of the function making
6077    the call.  */
6078 static bool
6079 arm_return_in_memory (const_tree type, const_tree fntype)
6080 {
6081   HOST_WIDE_INT size;
6082
6083   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
6084
6085   if (TARGET_AAPCS_BASED)
6086     {
6087       /* Simple, non-aggregate types (ie not including vectors and
6088          complex) are always returned in a register (or registers).
6089          We don't care about which register here, so we can short-cut
6090          some of the detail.  */
6091       if (!AGGREGATE_TYPE_P (type)
6092           && TREE_CODE (type) != VECTOR_TYPE
6093           && TREE_CODE (type) != COMPLEX_TYPE)
6094         return false;
6095
6096       /* Any return value that is no larger than one word can be
6097          returned in r0.  */
6098       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6099         return false;
6100
6101       /* Check any available co-processors to see if they accept the
6102          type as a register candidate (VFP, for example, can return
6103          some aggregates in consecutive registers).  These aren't
6104          available if the call is variadic.  */
6105       if (aapcs_select_return_coproc (type, fntype) >= 0)
6106         return false;
6107
6108       /* Vector values should be returned using ARM registers, not
6109          memory (unless they're over 16 bytes, which will break since
6110          we only have four call-clobbered registers to play with).  */
6111       if (TREE_CODE (type) == VECTOR_TYPE)
6112         return (size < 0 || size > (4 * UNITS_PER_WORD));
6113
6114       /* The rest go in memory.  */
6115       return true;
6116     }
6117
6118   if (TREE_CODE (type) == VECTOR_TYPE)
6119     return (size < 0 || size > (4 * UNITS_PER_WORD));
6120
6121   if (!AGGREGATE_TYPE_P (type) &&
6122       (TREE_CODE (type) != VECTOR_TYPE))
6123     /* All simple types are returned in registers.  */
6124     return false;
6125
6126   if (arm_abi != ARM_ABI_APCS)
6127     {
6128       /* ATPCS and later return aggregate types in memory only if they are
6129          larger than a word (or are variable size).  */
6130       return (size < 0 || size > UNITS_PER_WORD);
6131     }
6132
6133   /* For the arm-wince targets we choose to be compatible with Microsoft's
6134      ARM and Thumb compilers, which always return aggregates in memory.  */
6135 #ifndef ARM_WINCE
6136   /* All structures/unions bigger than one word are returned in memory.
6137      Also catch the case where int_size_in_bytes returns -1.  In this case
6138      the aggregate is either huge or of variable size, and in either case
6139      we will want to return it via memory and not in a register.  */
6140   if (size < 0 || size > UNITS_PER_WORD)
6141     return true;
6142
6143   if (TREE_CODE (type) == RECORD_TYPE)
6144     {
6145       tree field;
6146
6147       /* For a struct the APCS says that we only return in a register
6148          if the type is 'integer like' and every addressable element
6149          has an offset of zero.  For practical purposes this means
6150          that the structure can have at most one non bit-field element
6151          and that this element must be the first one in the structure.  */
6152
6153       /* Find the first field, ignoring non FIELD_DECL things which will
6154          have been created by C++.  */
6155       /* NOTE: This code is deprecated and has not been updated to handle
6156          DECL_FIELD_ABI_IGNORED.  */
6157       for (field = TYPE_FIELDS (type);
6158            field && TREE_CODE (field) != FIELD_DECL;
6159            field = DECL_CHAIN (field))
6160         continue;
6161
6162       if (field == NULL)
6163         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6164
6165       /* Check that the first field is valid for returning in a register.  */
6166
6167       /* ... Floats are not allowed */
6168       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6169         return true;
6170
6171       /* ... Aggregates that are not themselves valid for returning in
6172          a register are not allowed.  */
6173       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6174         return true;
6175
6176       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6177          since they are not addressable.  */
6178       for (field = DECL_CHAIN (field);
6179            field;
6180            field = DECL_CHAIN (field))
6181         {
6182           if (TREE_CODE (field) != FIELD_DECL)
6183             continue;
6184
6185           if (!DECL_BIT_FIELD_TYPE (field))
6186             return true;
6187         }
6188
6189       return false;
6190     }
6191
6192   if (TREE_CODE (type) == UNION_TYPE)
6193     {
6194       tree field;
6195
6196       /* Unions can be returned in registers if every element is
6197          integral, or can be returned in an integer register.  */
6198       for (field = TYPE_FIELDS (type);
6199            field;
6200            field = DECL_CHAIN (field))
6201         {
6202           if (TREE_CODE (field) != FIELD_DECL)
6203             continue;
6204
6205           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6206             return true;
6207
6208           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6209             return true;
6210         }
6211
6212       return false;
6213     }
6214 #endif /* not ARM_WINCE */
6215
6216   /* Return all other types in memory.  */
6217   return true;
6218 }
6219
6220 const struct pcs_attribute_arg
6221 {
6222   const char *arg;
6223   enum arm_pcs value;
6224 } pcs_attribute_args[] =
6225   {
6226     {"aapcs", ARM_PCS_AAPCS},
6227     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6228 #if 0
6229     /* We could recognize these, but changes would be needed elsewhere
6230      * to implement them.  */
6231     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6232     {"atpcs", ARM_PCS_ATPCS},
6233     {"apcs", ARM_PCS_APCS},
6234 #endif
6235     {NULL, ARM_PCS_UNKNOWN}
6236   };
6237
6238 static enum arm_pcs
6239 arm_pcs_from_attribute (tree attr)
6240 {
6241   const struct pcs_attribute_arg *ptr;
6242   const char *arg;
6243
6244   /* Get the value of the argument.  */
6245   if (TREE_VALUE (attr) == NULL_TREE
6246       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6247     return ARM_PCS_UNKNOWN;
6248
6249   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6250
6251   /* Check it against the list of known arguments.  */
6252   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6253     if (streq (arg, ptr->arg))
6254       return ptr->value;
6255
6256   /* An unrecognized interrupt type.  */
6257   return ARM_PCS_UNKNOWN;
6258 }
6259
6260 /* Get the PCS variant to use for this call.  TYPE is the function's type
6261    specification, DECL is the specific declartion.  DECL may be null if
6262    the call could be indirect or if this is a library call.  */
6263 static enum arm_pcs
6264 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6265 {
6266   bool user_convention = false;
6267   enum arm_pcs user_pcs = arm_pcs_default;
6268   tree attr;
6269
6270   gcc_assert (type);
6271
6272   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6273   if (attr)
6274     {
6275       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6276       user_convention = true;
6277     }
6278
6279   if (TARGET_AAPCS_BASED)
6280     {
6281       /* Detect varargs functions.  These always use the base rules
6282          (no argument is ever a candidate for a co-processor
6283          register).  */
6284       bool base_rules = stdarg_p (type);
6285
6286       if (user_convention)
6287         {
6288           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6289             sorry ("non-AAPCS derived PCS variant");
6290           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6291             error ("variadic functions must use the base AAPCS variant");
6292         }
6293
6294       if (base_rules)
6295         return ARM_PCS_AAPCS;
6296       else if (user_convention)
6297         return user_pcs;
6298 #if 0
6299       /* Unfortunately, this is not safe and can lead to wrong code
6300          being generated (PR96882).  Not all calls into the back-end
6301          pass the DECL, so it is unsafe to make any PCS-changing
6302          decisions based on it.  In particular the RETURN_IN_MEMORY
6303          hook is only ever passed a TYPE.  This needs revisiting to
6304          see if there are any partial improvements that can be
6305          re-enabled.  */
6306       else if (decl && flag_unit_at_a_time)
6307         {
6308           /* Local functions never leak outside this compilation unit,
6309              so we are free to use whatever conventions are
6310              appropriate.  */
6311           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6312           cgraph_node *local_info_node
6313             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6314           if (local_info_node && local_info_node->local)
6315             return ARM_PCS_AAPCS_LOCAL;
6316         }
6317 #endif
6318     }
6319   else if (user_convention && user_pcs != arm_pcs_default)
6320     sorry ("PCS variant");
6321
6322   /* For everything else we use the target's default.  */
6323   return arm_pcs_default;
6324 }
6325
6326
6327 static void
6328 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6329                     const_tree fntype ATTRIBUTE_UNUSED,
6330                     rtx libcall ATTRIBUTE_UNUSED,
6331                     const_tree fndecl ATTRIBUTE_UNUSED)
6332 {
6333   /* Record the unallocated VFP registers.  */
6334   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6335   pcum->aapcs_vfp_reg_alloc = 0;
6336 }
6337
6338 /* Bitmasks that indicate whether earlier versions of GCC would have
6339    taken a different path through the ABI logic.  This should result in
6340    a -Wpsabi warning if the earlier path led to a different ABI decision.
6341
6342    WARN_PSABI_EMPTY_CXX17_BASE
6343       Indicates that the type includes an artificial empty C++17 base field
6344       that, prior to GCC 10.1, would prevent the type from being treated as
6345       a HFA or HVA.  See PR94711 for details.
6346
6347    WARN_PSABI_NO_UNIQUE_ADDRESS
6348       Indicates that the type includes an empty [[no_unique_address]] field
6349       that, prior to GCC 10.1, would prevent the type from being treated as
6350       a HFA or HVA.  */
6351 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6352 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6353 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6354
6355 /* Walk down the type tree of TYPE counting consecutive base elements.
6356    If *MODEP is VOIDmode, then set it to the first valid floating point
6357    type.  If a non-floating point type is found, or if a floating point
6358    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6359    otherwise return the count in the sub-tree.
6360
6361    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6362    function has changed its behavior relative to earlier versions of GCC.
6363    Normally the argument should be nonnull and point to a zero-initialized
6364    variable.  The function then records whether the ABI decision might
6365    be affected by a known fix to the ABI logic, setting the associated
6366    WARN_PSABI_* bits if so.
6367
6368    When the argument is instead a null pointer, the function tries to
6369    simulate the behavior of GCC before all such ABI fixes were made.
6370    This is useful to check whether the function returns something
6371    different after the ABI fixes.  */
6372 static int
6373 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6374                          unsigned int *warn_psabi_flags)
6375 {
6376   machine_mode mode;
6377   HOST_WIDE_INT size;
6378
6379   switch (TREE_CODE (type))
6380     {
6381     case REAL_TYPE:
6382       mode = TYPE_MODE (type);
6383       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6384         return -1;
6385
6386       if (*modep == VOIDmode)
6387         *modep = mode;
6388
6389       if (*modep == mode)
6390         return 1;
6391
6392       break;
6393
6394     case COMPLEX_TYPE:
6395       mode = TYPE_MODE (TREE_TYPE (type));
6396       if (mode != DFmode && mode != SFmode)
6397         return -1;
6398
6399       if (*modep == VOIDmode)
6400         *modep = mode;
6401
6402       if (*modep == mode)
6403         return 2;
6404
6405       break;
6406
6407     case VECTOR_TYPE:
6408       /* Use V2SImode and V4SImode as representatives of all 64-bit
6409          and 128-bit vector types, whether or not those modes are
6410          supported with the present options.  */
6411       size = int_size_in_bytes (type);
6412       switch (size)
6413         {
6414         case 8:
6415           mode = V2SImode;
6416           break;
6417         case 16:
6418           mode = V4SImode;
6419           break;
6420         default:
6421           return -1;
6422         }
6423
6424       if (*modep == VOIDmode)
6425         *modep = mode;
6426
6427       /* Vector modes are considered to be opaque: two vectors are
6428          equivalent for the purposes of being homogeneous aggregates
6429          if they are the same size.  */
6430       if (*modep == mode)
6431         return 1;
6432
6433       break;
6434
6435     case ARRAY_TYPE:
6436       {
6437         int count;
6438         tree index = TYPE_DOMAIN (type);
6439
6440         /* Can't handle incomplete types nor sizes that are not
6441            fixed.  */
6442         if (!COMPLETE_TYPE_P (type)
6443             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6444           return -1;
6445
6446         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6447                                          warn_psabi_flags);
6448         if (count == -1
6449             || !index
6450             || !TYPE_MAX_VALUE (index)
6451             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6452             || !TYPE_MIN_VALUE (index)
6453             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6454             || count < 0)
6455           return -1;
6456
6457         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6458                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6459
6460         /* There must be no padding.  */
6461         if (wi::to_wide (TYPE_SIZE (type))
6462             != count * GET_MODE_BITSIZE (*modep))
6463           return -1;
6464
6465         return count;
6466       }
6467
6468     case RECORD_TYPE:
6469       {
6470         int count = 0;
6471         int sub_count;
6472         tree field;
6473
6474         /* Can't handle incomplete types nor sizes that are not
6475            fixed.  */
6476         if (!COMPLETE_TYPE_P (type)
6477             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6478           return -1;
6479
6480         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6481           {
6482             if (TREE_CODE (field) != FIELD_DECL)
6483               continue;
6484
6485             if (DECL_FIELD_ABI_IGNORED (field))
6486               {
6487                 /* See whether this is something that earlier versions of
6488                    GCC failed to ignore.  */
6489                 unsigned int flag;
6490                 if (lookup_attribute ("no_unique_address",
6491                                       DECL_ATTRIBUTES (field)))
6492                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6493                 else if (cxx17_empty_base_field_p (field))
6494                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6495                 else
6496                   /* No compatibility problem.  */
6497                   continue;
6498
6499                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6500                 if (warn_psabi_flags)
6501                   {
6502                     *warn_psabi_flags |= flag;
6503                     continue;
6504                   }
6505               }
6506             /* A zero-width bitfield may affect layout in some
6507                circumstances, but adds no members.  The determination
6508                of whether or not a type is an HFA is performed after
6509                layout is complete, so if the type still looks like an
6510                HFA afterwards, it is still classed as one.  This is
6511                potentially an ABI break for the hard-float ABI.  */
6512             else if (DECL_BIT_FIELD (field)
6513                      && integer_zerop (DECL_SIZE (field)))
6514               {
6515                 /* Prior to GCC-12 these fields were striped early,
6516                    hiding them from the back-end entirely and
6517                    resulting in the correct behaviour for argument
6518                    passing.  Simulate that old behaviour without
6519                    generating a warning.  */
6520                 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6521                   continue;
6522                 if (warn_psabi_flags)
6523                   {
6524                     *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6525                     continue;
6526                   }
6527               }
6528
6529             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6530                                                  warn_psabi_flags);
6531             if (sub_count < 0)
6532               return -1;
6533             count += sub_count;
6534           }
6535
6536         /* There must be no padding.  */
6537         if (wi::to_wide (TYPE_SIZE (type))
6538             != count * GET_MODE_BITSIZE (*modep))
6539           return -1;
6540
6541         return count;
6542       }
6543
6544     case UNION_TYPE:
6545     case QUAL_UNION_TYPE:
6546       {
6547         /* These aren't very interesting except in a degenerate case.  */
6548         int count = 0;
6549         int sub_count;
6550         tree field;
6551
6552         /* Can't handle incomplete types nor sizes that are not
6553            fixed.  */
6554         if (!COMPLETE_TYPE_P (type)
6555             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6556           return -1;
6557
6558         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6559           {
6560             if (TREE_CODE (field) != FIELD_DECL)
6561               continue;
6562
6563             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6564                                                  warn_psabi_flags);
6565             if (sub_count < 0)
6566               return -1;
6567             count = count > sub_count ? count : sub_count;
6568           }
6569
6570         /* There must be no padding.  */
6571         if (wi::to_wide (TYPE_SIZE (type))
6572             != count * GET_MODE_BITSIZE (*modep))
6573           return -1;
6574
6575         return count;
6576       }
6577
6578     default:
6579       break;
6580     }
6581
6582   return -1;
6583 }
6584
6585 /* Return true if PCS_VARIANT should use VFP registers.  */
6586 static bool
6587 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6588 {
6589   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6590     {
6591       static bool seen_thumb1_vfp = false;
6592
6593       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6594         {
6595           sorry ("Thumb-1 %<hard-float%> VFP ABI");
6596           /* sorry() is not immediately fatal, so only display this once.  */
6597           seen_thumb1_vfp = true;
6598         }
6599
6600       return true;
6601     }
6602
6603   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6604     return false;
6605
6606   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6607          (TARGET_VFP_DOUBLE || !is_double));
6608 }
6609
6610 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6611    suitable for passing or returning in VFP registers for the PCS
6612    variant selected.  If it is, then *BASE_MODE is updated to contain
6613    a machine mode describing each element of the argument's type and
6614    *COUNT to hold the number of such elements.  */
6615 static bool
6616 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6617                                        machine_mode mode, const_tree type,
6618                                        machine_mode *base_mode, int *count)
6619 {
6620   machine_mode new_mode = VOIDmode;
6621
6622   /* If we have the type information, prefer that to working things
6623      out from the mode.  */
6624   if (type)
6625     {
6626       unsigned int warn_psabi_flags = 0;
6627       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6628                                               &warn_psabi_flags);
6629       if (ag_count > 0 && ag_count <= 4)
6630         {
6631           static unsigned last_reported_type_uid;
6632           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6633           int alt;
6634           if (warn_psabi
6635               && warn_psabi_flags
6636               && uid != last_reported_type_uid
6637               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6638                   != ag_count))
6639             {
6640               const char *url10
6641                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6642               const char *url12
6643                 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6644               gcc_assert (alt == -1);
6645               last_reported_type_uid = uid;
6646               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6647                  qualification.  */
6648               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6649                 inform (input_location, "parameter passing for argument of "
6650                         "type %qT with %<[[no_unique_address]]%> members "
6651                         "changed %{in GCC 10.1%}",
6652                         TYPE_MAIN_VARIANT (type), url10);
6653               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6654                 inform (input_location, "parameter passing for argument of "
6655                         "type %qT when C++17 is enabled changed to match "
6656                         "C++14 %{in GCC 10.1%}",
6657                         TYPE_MAIN_VARIANT (type), url10);
6658               else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6659                 inform (input_location, "parameter passing for argument of "
6660                         "type %qT changed %{in GCC 12.1%}",
6661                         TYPE_MAIN_VARIANT (type), url12);
6662             }
6663           *count = ag_count;
6664         }
6665       else
6666         return false;
6667     }
6668   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6669            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6670            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6671     {
6672       *count = 1;
6673       new_mode = mode;
6674     }
6675   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6676     {
6677       *count = 2;
6678       new_mode = (mode == DCmode ? DFmode : SFmode);
6679     }
6680   else
6681     return false;
6682
6683
6684   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6685     return false;
6686
6687   *base_mode = new_mode;
6688
6689   if (TARGET_GENERAL_REGS_ONLY)
6690     error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6691            type);
6692
6693   return true;
6694 }
6695
6696 static bool
6697 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6698                                machine_mode mode, const_tree type)
6699 {
6700   int count ATTRIBUTE_UNUSED;
6701   machine_mode ag_mode ATTRIBUTE_UNUSED;
6702
6703   if (!use_vfp_abi (pcs_variant, false))
6704     return false;
6705   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6706                                                 &ag_mode, &count);
6707 }
6708
6709 static bool
6710 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6711                              const_tree type)
6712 {
6713   if (!use_vfp_abi (pcum->pcs_variant, false))
6714     return false;
6715
6716   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6717                                                 &pcum->aapcs_vfp_rmode,
6718                                                 &pcum->aapcs_vfp_rcount);
6719 }
6720
6721 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6722    for the behaviour of this function.  */
6723
6724 static bool
6725 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6726                     const_tree type  ATTRIBUTE_UNUSED)
6727 {
6728   int rmode_size
6729     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6730   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6731   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6732   int regno;
6733
6734   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6735     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6736       {
6737         pcum->aapcs_vfp_reg_alloc = mask << regno;
6738         if (mode == BLKmode
6739             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6740             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6741           {
6742             int i;
6743             int rcount = pcum->aapcs_vfp_rcount;
6744             int rshift = shift;
6745             machine_mode rmode = pcum->aapcs_vfp_rmode;
6746             rtx par;
6747             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6748               {
6749                 /* Avoid using unsupported vector modes.  */
6750                 if (rmode == V2SImode)
6751                   rmode = DImode;
6752                 else if (rmode == V4SImode)
6753                   {
6754                     rmode = DImode;
6755                     rcount *= 2;
6756                     rshift /= 2;
6757                   }
6758               }
6759             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6760             for (i = 0; i < rcount; i++)
6761               {
6762                 rtx tmp = gen_rtx_REG (rmode,
6763                                        FIRST_VFP_REGNUM + regno + i * rshift);
6764                 tmp = gen_rtx_EXPR_LIST
6765                   (VOIDmode, tmp,
6766                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6767                 XVECEXP (par, 0, i) = tmp;
6768               }
6769
6770             pcum->aapcs_reg = par;
6771           }
6772         else
6773           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6774         return true;
6775       }
6776   return false;
6777 }
6778
6779 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6780    comment there for the behaviour of this function.  */
6781
6782 static rtx
6783 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6784                                machine_mode mode,
6785                                const_tree type ATTRIBUTE_UNUSED)
6786 {
6787   if (!use_vfp_abi (pcs_variant, false))
6788     return NULL;
6789
6790   if (mode == BLKmode
6791       || (GET_MODE_CLASS (mode) == MODE_INT
6792           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6793           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6794     {
6795       int count;
6796       machine_mode ag_mode;
6797       int i;
6798       rtx par;
6799       int shift;
6800
6801       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6802                                              &ag_mode, &count);
6803
6804       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6805         {
6806           if (ag_mode == V2SImode)
6807             ag_mode = DImode;
6808           else if (ag_mode == V4SImode)
6809             {
6810               ag_mode = DImode;
6811               count *= 2;
6812             }
6813         }
6814       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6815       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6816       for (i = 0; i < count; i++)
6817         {
6818           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6819           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6820                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6821           XVECEXP (par, 0, i) = tmp;
6822         }
6823
6824       return par;
6825     }
6826
6827   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6828 }
6829
6830 static void
6831 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6832                    machine_mode mode  ATTRIBUTE_UNUSED,
6833                    const_tree type  ATTRIBUTE_UNUSED)
6834 {
6835   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6836   pcum->aapcs_vfp_reg_alloc = 0;
6837   return;
6838 }
6839
6840 #define AAPCS_CP(X)                             \
6841   {                                             \
6842     aapcs_ ## X ## _cum_init,                   \
6843     aapcs_ ## X ## _is_call_candidate,          \
6844     aapcs_ ## X ## _allocate,                   \
6845     aapcs_ ## X ## _is_return_candidate,        \
6846     aapcs_ ## X ## _allocate_return_reg,        \
6847     aapcs_ ## X ## _advance                     \
6848   }
6849
6850 /* Table of co-processors that can be used to pass arguments in
6851    registers.  Idealy no arugment should be a candidate for more than
6852    one co-processor table entry, but the table is processed in order
6853    and stops after the first match.  If that entry then fails to put
6854    the argument into a co-processor register, the argument will go on
6855    the stack.  */
6856 static struct
6857 {
6858   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6859   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6860
6861   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6862      BLKmode) is a candidate for this co-processor's registers; this
6863      function should ignore any position-dependent state in
6864      CUMULATIVE_ARGS and only use call-type dependent information.  */
6865   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6866
6867   /* Return true if the argument does get a co-processor register; it
6868      should set aapcs_reg to an RTX of the register allocated as is
6869      required for a return from FUNCTION_ARG.  */
6870   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6871
6872   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6873      be returned in this co-processor's registers.  */
6874   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6875
6876   /* Allocate and return an RTX element to hold the return type of a call.  This
6877      routine must not fail and will only be called if is_return_candidate
6878      returned true with the same parameters.  */
6879   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6880
6881   /* Finish processing this argument and prepare to start processing
6882      the next one.  */
6883   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6884 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6885   {
6886     AAPCS_CP(vfp)
6887   };
6888
6889 #undef AAPCS_CP
6890
6891 static int
6892 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6893                           const_tree type)
6894 {
6895   int i;
6896
6897   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6898     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6899       return i;
6900
6901   return -1;
6902 }
6903
6904 static int
6905 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6906 {
6907   /* We aren't passed a decl, so we can't check that a call is local.
6908      However, it isn't clear that that would be a win anyway, since it
6909      might limit some tail-calling opportunities.  */
6910   enum arm_pcs pcs_variant;
6911
6912   if (fntype)
6913     {
6914       const_tree fndecl = NULL_TREE;
6915
6916       if (TREE_CODE (fntype) == FUNCTION_DECL)
6917         {
6918           fndecl = fntype;
6919           fntype = TREE_TYPE (fntype);
6920         }
6921
6922       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6923     }
6924   else
6925     pcs_variant = arm_pcs_default;
6926
6927   if (pcs_variant != ARM_PCS_AAPCS)
6928     {
6929       int i;
6930
6931       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6932         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6933                                                         TYPE_MODE (type),
6934                                                         type))
6935           return i;
6936     }
6937   return -1;
6938 }
6939
6940 static rtx
6941 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6942                            const_tree fntype)
6943 {
6944   /* We aren't passed a decl, so we can't check that a call is local.
6945      However, it isn't clear that that would be a win anyway, since it
6946      might limit some tail-calling opportunities.  */
6947   enum arm_pcs pcs_variant;
6948   int unsignedp ATTRIBUTE_UNUSED;
6949
6950   if (fntype)
6951     {
6952       const_tree fndecl = NULL_TREE;
6953
6954       if (TREE_CODE (fntype) == FUNCTION_DECL)
6955         {
6956           fndecl = fntype;
6957           fntype = TREE_TYPE (fntype);
6958         }
6959
6960       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6961     }
6962   else
6963     pcs_variant = arm_pcs_default;
6964
6965   /* Promote integer types.  */
6966   if (type && INTEGRAL_TYPE_P (type))
6967     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6968
6969   if (pcs_variant != ARM_PCS_AAPCS)
6970     {
6971       int i;
6972
6973       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6974         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6975                                                         type))
6976           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6977                                                              mode, type);
6978     }
6979
6980   /* Promotes small structs returned in a register to full-word size
6981      for big-endian AAPCS.  */
6982   if (type && arm_return_in_msb (type))
6983     {
6984       HOST_WIDE_INT size = int_size_in_bytes (type);
6985       if (size % UNITS_PER_WORD != 0)
6986         {
6987           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6988           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6989         }
6990     }
6991
6992   return gen_rtx_REG (mode, R0_REGNUM);
6993 }
6994
6995 static rtx
6996 aapcs_libcall_value (machine_mode mode)
6997 {
6998   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6999       && GET_MODE_SIZE (mode) <= 4)
7000     mode = SImode;
7001
7002   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
7003 }
7004
7005 /* Lay out a function argument using the AAPCS rules.  The rule
7006    numbers referred to here are those in the AAPCS.  */
7007 static void
7008 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
7009                   const_tree type, bool named)
7010 {
7011   int nregs, nregs2;
7012   int ncrn;
7013
7014   /* We only need to do this once per argument.  */
7015   if (pcum->aapcs_arg_processed)
7016     return;
7017
7018   pcum->aapcs_arg_processed = true;
7019
7020   /* Special case: if named is false then we are handling an incoming
7021      anonymous argument which is on the stack.  */
7022   if (!named)
7023     return;
7024
7025   /* Is this a potential co-processor register candidate?  */
7026   if (pcum->pcs_variant != ARM_PCS_AAPCS)
7027     {
7028       int slot = aapcs_select_call_coproc (pcum, mode, type);
7029       pcum->aapcs_cprc_slot = slot;
7030
7031       /* We don't have to apply any of the rules from part B of the
7032          preparation phase, these are handled elsewhere in the
7033          compiler.  */
7034
7035       if (slot >= 0)
7036         {
7037           /* A Co-processor register candidate goes either in its own
7038              class of registers or on the stack.  */
7039           if (!pcum->aapcs_cprc_failed[slot])
7040             {
7041               /* C1.cp - Try to allocate the argument to co-processor
7042                  registers.  */
7043               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7044                 return;
7045
7046               /* C2.cp - Put the argument on the stack and note that we
7047                  can't assign any more candidates in this slot.  We also
7048                  need to note that we have allocated stack space, so that
7049                  we won't later try to split a non-cprc candidate between
7050                  core registers and the stack.  */
7051               pcum->aapcs_cprc_failed[slot] = true;
7052               pcum->can_split = false;
7053             }
7054
7055           /* We didn't get a register, so this argument goes on the
7056              stack.  */
7057           gcc_assert (pcum->can_split == false);
7058           return;
7059         }
7060     }
7061
7062   /* C3 - For double-word aligned arguments, round the NCRN up to the
7063      next even number.  */
7064   ncrn = pcum->aapcs_ncrn;
7065   if (ncrn & 1)
7066     {
7067       int res = arm_needs_doubleword_align (mode, type);
7068       /* Only warn during RTL expansion of call stmts, otherwise we would
7069          warn e.g. during gimplification even on functions that will be
7070          always inlined, and we'd warn multiple times.  Don't warn when
7071          called in expand_function_start either, as we warn instead in
7072          arm_function_arg_boundary in that case.  */
7073       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7074         inform (input_location, "parameter passing for argument of type "
7075                 "%qT changed in GCC 7.1", type);
7076       else if (res > 0)
7077         ncrn++;
7078     }
7079
7080   nregs = ARM_NUM_REGS2(mode, type);
7081
7082   /* Sigh, this test should really assert that nregs > 0, but a GCC
7083      extension allows empty structs and then gives them empty size; it
7084      then allows such a structure to be passed by value.  For some of
7085      the code below we have to pretend that such an argument has
7086      non-zero size so that we 'locate' it correctly either in
7087      registers or on the stack.  */
7088   gcc_assert (nregs >= 0);
7089
7090   nregs2 = nregs ? nregs : 1;
7091
7092   /* C4 - Argument fits entirely in core registers.  */
7093   if (ncrn + nregs2 <= NUM_ARG_REGS)
7094     {
7095       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7096       pcum->aapcs_next_ncrn = ncrn + nregs;
7097       return;
7098     }
7099
7100   /* C5 - Some core registers left and there are no arguments already
7101      on the stack: split this argument between the remaining core
7102      registers and the stack.  */
7103   if (ncrn < NUM_ARG_REGS && pcum->can_split)
7104     {
7105       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7106       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7107       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7108       return;
7109     }
7110
7111   /* C6 - NCRN is set to 4.  */
7112   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7113
7114   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
7115   return;
7116 }
7117
7118 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7119    for a call to a function whose data type is FNTYPE.
7120    For a library call, FNTYPE is NULL.  */
7121 void
7122 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7123                           rtx libname,
7124                           tree fndecl ATTRIBUTE_UNUSED)
7125 {
7126   /* Long call handling.  */
7127   if (fntype)
7128     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7129   else
7130     pcum->pcs_variant = arm_pcs_default;
7131
7132   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7133     {
7134       if (arm_libcall_uses_aapcs_base (libname))
7135         pcum->pcs_variant = ARM_PCS_AAPCS;
7136
7137       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7138       pcum->aapcs_reg = NULL_RTX;
7139       pcum->aapcs_partial = 0;
7140       pcum->aapcs_arg_processed = false;
7141       pcum->aapcs_cprc_slot = -1;
7142       pcum->can_split = true;
7143
7144       if (pcum->pcs_variant != ARM_PCS_AAPCS)
7145         {
7146           int i;
7147
7148           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7149             {
7150               pcum->aapcs_cprc_failed[i] = false;
7151               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7152             }
7153         }
7154       return;
7155     }
7156
7157   /* Legacy ABIs */
7158
7159   /* On the ARM, the offset starts at 0.  */
7160   pcum->nregs = 0;
7161   pcum->iwmmxt_nregs = 0;
7162   pcum->can_split = true;
7163
7164   /* Varargs vectors are treated the same as long long.
7165      named_count avoids having to change the way arm handles 'named' */
7166   pcum->named_count = 0;
7167   pcum->nargs = 0;
7168
7169   if (TARGET_REALLY_IWMMXT && fntype)
7170     {
7171       tree fn_arg;
7172
7173       for (fn_arg = TYPE_ARG_TYPES (fntype);
7174            fn_arg;
7175            fn_arg = TREE_CHAIN (fn_arg))
7176         pcum->named_count += 1;
7177
7178       if (! pcum->named_count)
7179         pcum->named_count = INT_MAX;
7180     }
7181 }
7182
7183 /* Return 2 if double word alignment is required for argument passing,
7184    but wasn't required before the fix for PR88469.
7185    Return 1 if double word alignment is required for argument passing.
7186    Return -1 if double word alignment used to be required for argument
7187    passing before PR77728 ABI fix, but is not required anymore.
7188    Return 0 if double word alignment is not required and wasn't requried
7189    before either.  */
7190 static int
7191 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7192 {
7193   if (!type)
7194     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7195
7196   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7197   if (!AGGREGATE_TYPE_P (type))
7198     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7199
7200   /* Array types: Use member alignment of element type.  */
7201   if (TREE_CODE (type) == ARRAY_TYPE)
7202     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7203
7204   int ret = 0;
7205   int ret2 = 0;
7206   /* Record/aggregate types: Use greatest member alignment of any member.
7207
7208      Note that we explicitly consider zero-sized fields here, even though
7209      they don't map to AAPCS machine types.  For example, in:
7210
7211          struct __attribute__((aligned(8))) empty {};
7212
7213          struct s {
7214            [[no_unique_address]] empty e;
7215            int x;
7216          };
7217
7218      "s" contains only one Fundamental Data Type (the int field)
7219      but gains 8-byte alignment and size thanks to "e".  */
7220   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7221     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7222       {
7223         if (TREE_CODE (field) == FIELD_DECL)
7224           return 1;
7225         else
7226           /* Before PR77728 fix, we were incorrectly considering also
7227              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7228              Make sure we can warn about that with -Wpsabi.  */
7229           ret = -1;
7230       }
7231     else if (TREE_CODE (field) == FIELD_DECL
7232              && DECL_BIT_FIELD_TYPE (field)
7233              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7234       ret2 = 1;
7235
7236   if (ret2)
7237     return 2;
7238
7239   return ret;
7240 }
7241
7242
7243 /* Determine where to put an argument to a function.
7244    Value is zero to push the argument on the stack,
7245    or a hard register in which to store the argument.
7246
7247    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7248     the preceding args and about the function being called.
7249    ARG is a description of the argument.
7250
7251    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7252    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7253    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7254    defined), say it is passed in the stack (function_prologue will
7255    indeed make it pass in the stack if necessary).  */
7256
7257 static rtx
7258 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7259 {
7260   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7261   int nregs;
7262
7263   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7264      a call insn (op3 of a call_value insn).  */
7265   if (arg.end_marker_p ())
7266     return const0_rtx;
7267
7268   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7269     {
7270       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7271       return pcum->aapcs_reg;
7272     }
7273
7274   /* Varargs vectors are treated the same as long long.
7275      named_count avoids having to change the way arm handles 'named' */
7276   if (TARGET_IWMMXT_ABI
7277       && arm_vector_mode_supported_p (arg.mode)
7278       && pcum->named_count > pcum->nargs + 1)
7279     {
7280       if (pcum->iwmmxt_nregs <= 9)
7281         return gen_rtx_REG (arg.mode,
7282                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7283       else
7284         {
7285           pcum->can_split = false;
7286           return NULL_RTX;
7287         }
7288     }
7289
7290   /* Put doubleword aligned quantities in even register pairs.  */
7291   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7292     {
7293       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7294       if (res < 0 && warn_psabi)
7295         inform (input_location, "parameter passing for argument of type "
7296                 "%qT changed in GCC 7.1", arg.type);
7297       else if (res > 0)
7298         {
7299           pcum->nregs++;
7300           if (res > 1 && warn_psabi)
7301             inform (input_location, "parameter passing for argument of type "
7302                     "%qT changed in GCC 9.1", arg.type);
7303         }
7304     }
7305
7306   /* Only allow splitting an arg between regs and memory if all preceding
7307      args were allocated to regs.  For args passed by reference we only count
7308      the reference pointer.  */
7309   if (pcum->can_split)
7310     nregs = 1;
7311   else
7312     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7313
7314   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7315     return NULL_RTX;
7316
7317   return gen_rtx_REG (arg.mode, pcum->nregs);
7318 }
7319
7320 static unsigned int
7321 arm_function_arg_boundary (machine_mode mode, const_tree type)
7322 {
7323   if (!ARM_DOUBLEWORD_ALIGN)
7324     return PARM_BOUNDARY;
7325
7326   int res = arm_needs_doubleword_align (mode, type);
7327   if (res < 0 && warn_psabi)
7328     inform (input_location, "parameter passing for argument of type %qT "
7329             "changed in GCC 7.1", type);
7330   if (res > 1 && warn_psabi)
7331     inform (input_location, "parameter passing for argument of type "
7332             "%qT changed in GCC 9.1", type);
7333
7334   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7335 }
7336
7337 static int
7338 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7339 {
7340   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7341   int nregs = pcum->nregs;
7342
7343   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7344     {
7345       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7346       return pcum->aapcs_partial;
7347     }
7348
7349   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7350     return 0;
7351
7352   if (NUM_ARG_REGS > nregs
7353       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7354       && pcum->can_split)
7355     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7356
7357   return 0;
7358 }
7359
7360 /* Update the data in PCUM to advance over argument ARG.  */
7361
7362 static void
7363 arm_function_arg_advance (cumulative_args_t pcum_v,
7364                           const function_arg_info &arg)
7365 {
7366   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7367
7368   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7369     {
7370       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7371
7372       if (pcum->aapcs_cprc_slot >= 0)
7373         {
7374           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7375                                                               arg.type);
7376           pcum->aapcs_cprc_slot = -1;
7377         }
7378
7379       /* Generic stuff.  */
7380       pcum->aapcs_arg_processed = false;
7381       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7382       pcum->aapcs_reg = NULL_RTX;
7383       pcum->aapcs_partial = 0;
7384     }
7385   else
7386     {
7387       pcum->nargs += 1;
7388       if (arm_vector_mode_supported_p (arg.mode)
7389           && pcum->named_count > pcum->nargs
7390           && TARGET_IWMMXT_ABI)
7391         pcum->iwmmxt_nregs += 1;
7392       else
7393         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7394     }
7395 }
7396
7397 /* Variable sized types are passed by reference.  This is a GCC
7398    extension to the ARM ABI.  */
7399
7400 static bool
7401 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7402 {
7403   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7404 }
7405 \f
7406 /* Encode the current state of the #pragma [no_]long_calls.  */
7407 typedef enum
7408 {
7409   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7410   LONG,         /* #pragma long_calls is in effect.  */
7411   SHORT         /* #pragma no_long_calls is in effect.  */
7412 } arm_pragma_enum;
7413
7414 static arm_pragma_enum arm_pragma_long_calls = OFF;
7415
7416 void
7417 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7418 {
7419   arm_pragma_long_calls = LONG;
7420 }
7421
7422 void
7423 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7424 {
7425   arm_pragma_long_calls = SHORT;
7426 }
7427
7428 void
7429 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7430 {
7431   arm_pragma_long_calls = OFF;
7432 }
7433 \f
7434 /* Handle an attribute requiring a FUNCTION_DECL;
7435    arguments as in struct attribute_spec.handler.  */
7436 static tree
7437 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7438                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7439 {
7440   if (TREE_CODE (*node) != FUNCTION_DECL)
7441     {
7442       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7443                name);
7444       *no_add_attrs = true;
7445     }
7446
7447   return NULL_TREE;
7448 }
7449
7450 /* Handle an "interrupt" or "isr" attribute;
7451    arguments as in struct attribute_spec.handler.  */
7452 static tree
7453 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7454                           bool *no_add_attrs)
7455 {
7456   if (DECL_P (*node))
7457     {
7458       if (TREE_CODE (*node) != FUNCTION_DECL)
7459         {
7460           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7461                    name);
7462           *no_add_attrs = true;
7463         }
7464       else if (TARGET_VFP_BASE)
7465         {
7466           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7467                    name);
7468         }
7469       /* FIXME: the argument if any is checked for type attributes;
7470          should it be checked for decl ones?  */
7471     }
7472   else
7473     {
7474       if (FUNC_OR_METHOD_TYPE_P (*node))
7475         {
7476           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7477             {
7478               warning (OPT_Wattributes, "%qE attribute ignored",
7479                        name);
7480               *no_add_attrs = true;
7481             }
7482         }
7483       else if (TREE_CODE (*node) == POINTER_TYPE
7484                && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
7485                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7486         {
7487           *node = build_variant_type_copy (*node);
7488           TREE_TYPE (*node) = build_type_attribute_variant
7489             (TREE_TYPE (*node),
7490              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7491           *no_add_attrs = true;
7492         }
7493       else
7494         {
7495           /* Possibly pass this attribute on from the type to a decl.  */
7496           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7497                        | (int) ATTR_FLAG_FUNCTION_NEXT
7498                        | (int) ATTR_FLAG_ARRAY_NEXT))
7499             {
7500               *no_add_attrs = true;
7501               return tree_cons (name, args, NULL_TREE);
7502             }
7503           else
7504             {
7505               warning (OPT_Wattributes, "%qE attribute ignored",
7506                        name);
7507             }
7508         }
7509     }
7510
7511   return NULL_TREE;
7512 }
7513
7514 /* Handle a "pcs" attribute; arguments as in struct
7515    attribute_spec.handler.  */
7516 static tree
7517 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7518                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7519 {
7520   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7521     {
7522       warning (OPT_Wattributes, "%qE attribute ignored", name);
7523       *no_add_attrs = true;
7524     }
7525   return NULL_TREE;
7526 }
7527
7528 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7529 /* Handle the "notshared" attribute.  This attribute is another way of
7530    requesting hidden visibility.  ARM's compiler supports
7531    "__declspec(notshared)"; we support the same thing via an
7532    attribute.  */
7533
7534 static tree
7535 arm_handle_notshared_attribute (tree *node,
7536                                 tree name ATTRIBUTE_UNUSED,
7537                                 tree args ATTRIBUTE_UNUSED,
7538                                 int flags ATTRIBUTE_UNUSED,
7539                                 bool *no_add_attrs)
7540 {
7541   tree decl = TYPE_NAME (*node);
7542
7543   if (decl)
7544     {
7545       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7546       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7547       *no_add_attrs = false;
7548     }
7549   return NULL_TREE;
7550 }
7551 #endif
7552
7553 /* This function returns true if a function with declaration FNDECL and type
7554    FNTYPE uses the stack to pass arguments or return variables and false
7555    otherwise.  This is used for functions with the attributes
7556    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7557    diagnostic messages if the stack is used.  NAME is the name of the attribute
7558    used.  */
7559
7560 static bool
7561 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7562 {
7563   function_args_iterator args_iter;
7564   CUMULATIVE_ARGS args_so_far_v;
7565   cumulative_args_t args_so_far;
7566   bool first_param = true;
7567   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7568
7569   /* Error out if any argument is passed on the stack.  */
7570   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7571   args_so_far = pack_cumulative_args (&args_so_far_v);
7572   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7573     {
7574       rtx arg_rtx;
7575
7576       prev_arg_type = arg_type;
7577       if (VOID_TYPE_P (arg_type))
7578         continue;
7579
7580       function_arg_info arg (arg_type, /*named=*/true);
7581       if (!first_param)
7582         /* ??? We should advance after processing the argument and pass
7583            the argument we're advancing past.  */
7584         arm_function_arg_advance (args_so_far, arg);
7585       arg_rtx = arm_function_arg (args_so_far, arg);
7586       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7587         {
7588           error ("%qE attribute not available to functions with arguments "
7589                  "passed on the stack", name);
7590           return true;
7591         }
7592       first_param = false;
7593     }
7594
7595   /* Error out for variadic functions since we cannot control how many
7596      arguments will be passed and thus stack could be used.  stdarg_p () is not
7597      used for the checking to avoid browsing arguments twice.  */
7598   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7599     {
7600       error ("%qE attribute not available to functions with variable number "
7601              "of arguments", name);
7602       return true;
7603     }
7604
7605   /* Error out if return value is passed on the stack.  */
7606   ret_type = TREE_TYPE (fntype);
7607   if (arm_return_in_memory (ret_type, fntype))
7608     {
7609       error ("%qE attribute not available to functions that return value on "
7610              "the stack", name);
7611       return true;
7612     }
7613   return false;
7614 }
7615
7616 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7617    function will check whether the attribute is allowed here and will add the
7618    attribute to the function declaration tree or otherwise issue a warning.  */
7619
7620 static tree
7621 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7622                                  tree /* args */,
7623                                  int /* flags */,
7624                                  bool *no_add_attrs)
7625 {
7626   tree fndecl;
7627
7628   if (!use_cmse)
7629     {
7630       *no_add_attrs = true;
7631       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7632                "option", name);
7633       return NULL_TREE;
7634     }
7635
7636   /* Ignore attribute for function types.  */
7637   if (TREE_CODE (*node) != FUNCTION_DECL)
7638     {
7639       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7640                name);
7641       *no_add_attrs = true;
7642       return NULL_TREE;
7643     }
7644
7645   fndecl = *node;
7646
7647   /* Warn for static linkage functions.  */
7648   if (!TREE_PUBLIC (fndecl))
7649     {
7650       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7651                "with static linkage", name);
7652       *no_add_attrs = true;
7653       return NULL_TREE;
7654     }
7655
7656   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7657                                                 TREE_TYPE (fndecl));
7658   return NULL_TREE;
7659 }
7660
7661
7662 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7663    function will check whether the attribute is allowed here and will add the
7664    attribute to the function type tree or otherwise issue a diagnostic.  The
7665    reason we check this at declaration time is to only allow the use of the
7666    attribute with declarations of function pointers and not function
7667    declarations.  This function checks NODE is of the expected type and issues
7668    diagnostics otherwise using NAME.  If it is not of the expected type
7669    *NO_ADD_ATTRS will be set to true.  */
7670
7671 static tree
7672 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7673                                  tree /* args */,
7674                                  int /* flags */,
7675                                  bool *no_add_attrs)
7676 {
7677   tree decl = NULL_TREE;
7678   tree fntype, type;
7679
7680   if (!use_cmse)
7681     {
7682       *no_add_attrs = true;
7683       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7684                "option", name);
7685       return NULL_TREE;
7686     }
7687
7688   if (DECL_P (*node))
7689     {
7690       fntype = TREE_TYPE (*node);
7691
7692       if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
7693         decl = *node;
7694     }
7695   else
7696     fntype = *node;
7697
7698   while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7699     fntype = TREE_TYPE (fntype);
7700
7701   if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7702     {
7703         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7704                  "function pointer", name);
7705         *no_add_attrs = true;
7706         return NULL_TREE;
7707     }
7708
7709   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7710
7711   if (*no_add_attrs)
7712     return NULL_TREE;
7713
7714   /* Prevent trees being shared among function types with and without
7715      cmse_nonsecure_call attribute.  */
7716   if (decl)
7717     {
7718       type = build_distinct_type_copy (TREE_TYPE (decl));
7719       TREE_TYPE (decl) = type;
7720     }
7721   else
7722     {
7723       type = build_distinct_type_copy (*node);
7724       *node = type;
7725     }
7726
7727   fntype = type;
7728
7729   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7730     {
7731       type = fntype;
7732       fntype = TREE_TYPE (fntype);
7733       fntype = build_distinct_type_copy (fntype);
7734       TREE_TYPE (type) = fntype;
7735     }
7736
7737   /* Construct a type attribute and add it to the function type.  */
7738   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7739                           TYPE_ATTRIBUTES (fntype));
7740   TYPE_ATTRIBUTES (fntype) = attrs;
7741   return NULL_TREE;
7742 }
7743
7744 /* Return 0 if the attributes for two types are incompatible, 1 if they
7745    are compatible, and 2 if they are nearly compatible (which causes a
7746    warning to be generated).  */
7747 static int
7748 arm_comp_type_attributes (const_tree type1, const_tree type2)
7749 {
7750   int l1, l2, s1, s2;
7751
7752   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7753                                   TYPE_ATTRIBUTES (type1));
7754   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7755                                   TYPE_ATTRIBUTES (type2));
7756   if (bool (attrs1) != bool (attrs2))
7757     return 0;
7758   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7759     return 0;
7760
7761   /* Check for mismatch of non-default calling convention.  */
7762   if (TREE_CODE (type1) != FUNCTION_TYPE)
7763     return 1;
7764
7765   /* Check for mismatched call attributes.  */
7766   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7767   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7768   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7769   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7770
7771   /* Only bother to check if an attribute is defined.  */
7772   if (l1 | l2 | s1 | s2)
7773     {
7774       /* If one type has an attribute, the other must have the same attribute.  */
7775       if ((l1 != l2) || (s1 != s2))
7776         return 0;
7777
7778       /* Disallow mixed attributes.  */
7779       if ((l1 & s2) || (l2 & s1))
7780         return 0;
7781     }
7782
7783   /* Check for mismatched ISR attribute.  */
7784   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7785   if (! l1)
7786     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7787   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7788   if (! l2)
7789     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7790   if (l1 != l2)
7791     return 0;
7792
7793   l1 = lookup_attribute ("cmse_nonsecure_call",
7794                          TYPE_ATTRIBUTES (type1)) != NULL;
7795   l2 = lookup_attribute ("cmse_nonsecure_call",
7796                          TYPE_ATTRIBUTES (type2)) != NULL;
7797
7798   if (l1 != l2)
7799     return 0;
7800
7801   return 1;
7802 }
7803
7804 /*  Assigns default attributes to newly defined type.  This is used to
7805     set short_call/long_call attributes for function types of
7806     functions defined inside corresponding #pragma scopes.  */
7807 static void
7808 arm_set_default_type_attributes (tree type)
7809 {
7810   /* Add __attribute__ ((long_call)) to all functions, when
7811      inside #pragma long_calls or __attribute__ ((short_call)),
7812      when inside #pragma no_long_calls.  */
7813   if (FUNC_OR_METHOD_TYPE_P (type))
7814     {
7815       tree type_attr_list, attr_name;
7816       type_attr_list = TYPE_ATTRIBUTES (type);
7817
7818       if (arm_pragma_long_calls == LONG)
7819         attr_name = get_identifier ("long_call");
7820       else if (arm_pragma_long_calls == SHORT)
7821         attr_name = get_identifier ("short_call");
7822       else
7823         return;
7824
7825       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7826       TYPE_ATTRIBUTES (type) = type_attr_list;
7827     }
7828 }
7829 \f
7830 /* Return true if DECL is known to be linked into section SECTION.  */
7831
7832 static bool
7833 arm_function_in_section_p (tree decl, section *section)
7834 {
7835   /* We can only be certain about the prevailing symbol definition.  */
7836   if (!decl_binds_to_current_def_p (decl))
7837     return false;
7838
7839   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7840   if (!DECL_SECTION_NAME (decl))
7841     {
7842       /* Make sure that we will not create a unique section for DECL.  */
7843       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7844         return false;
7845     }
7846
7847   return function_section (decl) == section;
7848 }
7849
7850 /* Return nonzero if a 32-bit "long_call" should be generated for
7851    a call from the current function to DECL.  We generate a long_call
7852    if the function:
7853
7854         a.  has an __attribute__((long call))
7855      or b.  is within the scope of a #pragma long_calls
7856      or c.  the -mlong-calls command line switch has been specified
7857
7858    However we do not generate a long call if the function:
7859
7860         d.  has an __attribute__ ((short_call))
7861      or e.  is inside the scope of a #pragma no_long_calls
7862      or f.  is defined in the same section as the current function.  */
7863
7864 bool
7865 arm_is_long_call_p (tree decl)
7866 {
7867   tree attrs;
7868
7869   if (!decl)
7870     return TARGET_LONG_CALLS;
7871
7872   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7873   if (lookup_attribute ("short_call", attrs))
7874     return false;
7875
7876   /* For "f", be conservative, and only cater for cases in which the
7877      whole of the current function is placed in the same section.  */
7878   if (!flag_reorder_blocks_and_partition
7879       && TREE_CODE (decl) == FUNCTION_DECL
7880       && arm_function_in_section_p (decl, current_function_section ()))
7881     return false;
7882
7883   if (lookup_attribute ("long_call", attrs))
7884     return true;
7885
7886   return TARGET_LONG_CALLS;
7887 }
7888
7889 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7890 static bool
7891 arm_function_ok_for_sibcall (tree decl, tree exp)
7892 {
7893   unsigned long func_type;
7894
7895   if (cfun->machine->sibcall_blocked)
7896     return false;
7897
7898   if (TARGET_FDPIC)
7899     {
7900       /* In FDPIC, never tailcall something for which we have no decl:
7901          the target function could be in a different module, requiring
7902          a different FDPIC register value.  */
7903       if (decl == NULL)
7904         return false;
7905     }
7906
7907   /* Never tailcall something if we are generating code for Thumb-1.  */
7908   if (TARGET_THUMB1)
7909     return false;
7910
7911   /* The PIC register is live on entry to VxWorks PLT entries, so we
7912      must make the call before restoring the PIC register.  */
7913   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7914     return false;
7915
7916   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7917      may be used both as target of the call and base register for restoring
7918      the VFP registers  */
7919   if (TARGET_APCS_FRAME && TARGET_ARM
7920       && TARGET_HARD_FLOAT
7921       && decl && arm_is_long_call_p (decl))
7922     return false;
7923
7924   /* If we are interworking and the function is not declared static
7925      then we can't tail-call it unless we know that it exists in this
7926      compilation unit (since it might be a Thumb routine).  */
7927   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7928       && !TREE_ASM_WRITTEN (decl))
7929     return false;
7930
7931   func_type = arm_current_func_type ();
7932   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7933   if (IS_INTERRUPT (func_type))
7934     return false;
7935
7936   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7937      generated for entry functions themselves.  */
7938   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7939     return false;
7940
7941   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7942      this would complicate matters for later code generation.  */
7943   if (TREE_CODE (exp) == CALL_EXPR)
7944     {
7945       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7946       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7947         return false;
7948     }
7949
7950   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7951     {
7952       /* Check that the return value locations are the same.  For
7953          example that we aren't returning a value from the sibling in
7954          a VFP register but then need to transfer it to a core
7955          register.  */
7956       rtx a, b;
7957       tree decl_or_type = decl;
7958
7959       /* If it is an indirect function pointer, get the function type.  */
7960       if (!decl)
7961         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7962
7963       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7964       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7965                               cfun->decl, false);
7966       if (!rtx_equal_p (a, b))
7967         return false;
7968     }
7969
7970   /* Never tailcall if function may be called with a misaligned SP.  */
7971   if (IS_STACKALIGN (func_type))
7972     return false;
7973
7974   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7975      references should become a NOP.  Don't convert such calls into
7976      sibling calls.  */
7977   if (TARGET_AAPCS_BASED
7978       && arm_abi == ARM_ABI_AAPCS
7979       && decl
7980       && DECL_WEAK (decl))
7981     return false;
7982
7983   /* We cannot do a tailcall for an indirect call by descriptor if all the
7984      argument registers are used because the only register left to load the
7985      address is IP and it will already contain the static chain.  */
7986   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7987     {
7988       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7989       CUMULATIVE_ARGS cum;
7990       cumulative_args_t cum_v;
7991
7992       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7993       cum_v = pack_cumulative_args (&cum);
7994
7995       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7996         {
7997           tree type = TREE_VALUE (t);
7998           if (!VOID_TYPE_P (type))
7999             {
8000               function_arg_info arg (type, /*named=*/true);
8001               arm_function_arg_advance (cum_v, arg);
8002             }
8003         }
8004
8005       function_arg_info arg (integer_type_node, /*named=*/true);
8006       if (!arm_function_arg (cum_v, arg))
8007         return false;
8008     }
8009
8010   /* Everything else is ok.  */
8011   return true;
8012 }
8013
8014 \f
8015 /* Addressing mode support functions.  */
8016
8017 /* Return nonzero if X is a legitimate immediate operand when compiling
8018    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
8019 int
8020 legitimate_pic_operand_p (rtx x)
8021 {
8022   if (SYMBOL_REF_P (x)
8023       || (GET_CODE (x) == CONST
8024           && GET_CODE (XEXP (x, 0)) == PLUS
8025           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8026     return 0;
8027
8028   return 1;
8029 }
8030
8031 /* Record that the current function needs a PIC register.  If PIC_REG is null,
8032    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
8033    both case cfun->machine->pic_reg is initialized if we have not already done
8034    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
8035    PIC register is reloaded in the current position of the instruction stream
8036    irregardless of whether it was loaded before.  Otherwise, it is only loaded
8037    if not already done so (crtl->uses_pic_offset_table is null).  Note that
8038    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8039    is only supported iff COMPUTE_NOW is false.  */
8040
8041 static void
8042 require_pic_register (rtx pic_reg, bool compute_now)
8043 {
8044   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8045
8046   /* A lot of the logic here is made obscure by the fact that this
8047      routine gets called as part of the rtx cost estimation process.
8048      We don't want those calls to affect any assumptions about the real
8049      function; and further, we can't call entry_of_function() until we
8050      start the real expansion process.  */
8051   if (!crtl->uses_pic_offset_table || compute_now)
8052     {
8053       gcc_assert (can_create_pseudo_p ()
8054                   || (pic_reg != NULL_RTX
8055                       && REG_P (pic_reg)
8056                       && GET_MODE (pic_reg) == Pmode));
8057       if (arm_pic_register != INVALID_REGNUM
8058           && !compute_now
8059           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8060         {
8061           if (!cfun->machine->pic_reg)
8062             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8063
8064           /* Play games to avoid marking the function as needing pic
8065              if we are being called as part of the cost-estimation
8066              process.  */
8067           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8068             crtl->uses_pic_offset_table = 1;
8069         }
8070       else
8071         {
8072           rtx_insn *seq, *insn;
8073
8074           if (pic_reg == NULL_RTX)
8075             pic_reg = gen_reg_rtx (Pmode);
8076           if (!cfun->machine->pic_reg)
8077             cfun->machine->pic_reg = pic_reg;
8078
8079           /* Play games to avoid marking the function as needing pic
8080              if we are being called as part of the cost-estimation
8081              process.  */
8082           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8083             {
8084               crtl->uses_pic_offset_table = 1;
8085               start_sequence ();
8086
8087               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8088                   && arm_pic_register > LAST_LO_REGNUM
8089                   && !compute_now)
8090                 emit_move_insn (cfun->machine->pic_reg,
8091                                 gen_rtx_REG (Pmode, arm_pic_register));
8092               else
8093                 arm_load_pic_register (0UL, pic_reg);
8094
8095               seq = get_insns ();
8096               end_sequence ();
8097
8098               for (insn = seq; insn; insn = NEXT_INSN (insn))
8099                 if (INSN_P (insn))
8100                   INSN_LOCATION (insn) = prologue_location;
8101
8102               /* We can be called during expansion of PHI nodes, where
8103                  we can't yet emit instructions directly in the final
8104                  insn stream.  Queue the insns on the entry edge, they will
8105                  be committed after everything else is expanded.  */
8106               if (currently_expanding_to_rtl)
8107                 insert_insn_on_edge (seq,
8108                                      single_succ_edge
8109                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8110               else
8111                 emit_insn (seq);
8112             }
8113         }
8114     }
8115 }
8116
8117 /* Generate insns to calculate the address of ORIG in pic mode.  */
8118 static rtx_insn *
8119 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8120 {
8121   rtx pat;
8122   rtx mem;
8123
8124   pat = gen_calculate_pic_address (reg, pic_reg, orig);
8125
8126   /* Make the MEM as close to a constant as possible.  */
8127   mem = SET_SRC (pat);
8128   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8129   MEM_READONLY_P (mem) = 1;
8130   MEM_NOTRAP_P (mem) = 1;
8131
8132   return emit_insn (pat);
8133 }
8134
8135 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
8136    created to hold the result of the load.  If not NULL, PIC_REG indicates
8137    which register to use as PIC register, otherwise it is decided by register
8138    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
8139    location in the instruction stream, irregardless of whether it was loaded
8140    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8141    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8142
8143    Returns the register REG into which the PIC load is performed.  */
8144
8145 rtx
8146 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8147                         bool compute_now)
8148 {
8149   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8150
8151   if (SYMBOL_REF_P (orig)
8152       || LABEL_REF_P (orig))
8153     {
8154       if (reg == 0)
8155         {
8156           gcc_assert (can_create_pseudo_p ());
8157           reg = gen_reg_rtx (Pmode);
8158         }
8159
8160       /* VxWorks does not impose a fixed gap between segments; the run-time
8161          gap can be different from the object-file gap.  We therefore can't
8162          use GOTOFF unless we are absolutely sure that the symbol is in the
8163          same segment as the GOT.  Unfortunately, the flexibility of linker
8164          scripts means that we can't be sure of that in general, so assume
8165          that GOTOFF is never valid on VxWorks.  */
8166       /* References to weak symbols cannot be resolved locally: they
8167          may be overridden by a non-weak definition at link time.  */
8168       rtx_insn *insn;
8169       if ((LABEL_REF_P (orig)
8170            || (SYMBOL_REF_P (orig)
8171                && SYMBOL_REF_LOCAL_P (orig)
8172                && (SYMBOL_REF_DECL (orig)
8173                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8174                && (!SYMBOL_REF_FUNCTION_P (orig)
8175                    || arm_fdpic_local_funcdesc_p (orig))))
8176           && NEED_GOT_RELOC
8177           && arm_pic_data_is_text_relative)
8178         insn = arm_pic_static_addr (orig, reg);
8179       else
8180         {
8181           /* If this function doesn't have a pic register, create one now.  */
8182           require_pic_register (pic_reg, compute_now);
8183
8184           if (pic_reg == NULL_RTX)
8185             pic_reg = cfun->machine->pic_reg;
8186
8187           insn = calculate_pic_address_constant (reg, pic_reg, orig);
8188         }
8189
8190       /* Put a REG_EQUAL note on this insn, so that it can be optimized
8191          by loop.  */
8192       set_unique_reg_note (insn, REG_EQUAL, orig);
8193
8194       return reg;
8195     }
8196   else if (GET_CODE (orig) == CONST)
8197     {
8198       rtx base, offset;
8199
8200       if (GET_CODE (XEXP (orig, 0)) == PLUS
8201           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8202         return orig;
8203
8204       /* Handle the case where we have: const (UNSPEC_TLS).  */
8205       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8206           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8207         return orig;
8208
8209       /* Handle the case where we have:
8210          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8211          CONST_INT.  */
8212       if (GET_CODE (XEXP (orig, 0)) == PLUS
8213           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8214           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8215         {
8216           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8217           return orig;
8218         }
8219
8220       if (reg == 0)
8221         {
8222           gcc_assert (can_create_pseudo_p ());
8223           reg = gen_reg_rtx (Pmode);
8224         }
8225
8226       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8227
8228       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8229                                      pic_reg, compute_now);
8230       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8231                                        base == reg ? 0 : reg, pic_reg,
8232                                        compute_now);
8233
8234       if (CONST_INT_P (offset))
8235         {
8236           /* The base register doesn't really matter, we only want to
8237              test the index for the appropriate mode.  */
8238           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8239             {
8240               gcc_assert (can_create_pseudo_p ());
8241               offset = force_reg (Pmode, offset);
8242             }
8243
8244           if (CONST_INT_P (offset))
8245             return plus_constant (Pmode, base, INTVAL (offset));
8246         }
8247
8248       if (GET_MODE_SIZE (mode) > 4
8249           && (GET_MODE_CLASS (mode) == MODE_INT
8250               || TARGET_SOFT_FLOAT))
8251         {
8252           emit_insn (gen_addsi3 (reg, base, offset));
8253           return reg;
8254         }
8255
8256       return gen_rtx_PLUS (Pmode, base, offset);
8257     }
8258
8259   return orig;
8260 }
8261
8262
8263 /* Generate insns that produce the address of the stack canary */
8264 rtx
8265 arm_stack_protect_tls_canary_mem (bool reload)
8266 {
8267   rtx tp = gen_reg_rtx (SImode);
8268   if (reload)
8269     emit_insn (gen_reload_tp_hard (tp));
8270   else
8271     emit_insn (gen_load_tp_hard (tp));
8272
8273   rtx reg = gen_reg_rtx (SImode);
8274   rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8275   emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8276   return gen_rtx_MEM (SImode, reg);
8277 }
8278
8279
8280 /* Whether a register is callee saved or not.  This is necessary because high
8281    registers are marked as caller saved when optimizing for size on Thumb-1
8282    targets despite being callee saved in order to avoid using them.  */
8283 #define callee_saved_reg_p(reg) \
8284   (!call_used_or_fixed_reg_p (reg) \
8285    || (TARGET_THUMB1 && optimize_size \
8286        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8287
8288 /* Return a mask for the call-clobbered low registers that are unused
8289    at the end of the prologue.  */
8290 static unsigned long
8291 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8292 {
8293   unsigned long mask = 0;
8294   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8295
8296   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8297     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8298       mask |= 1 << (reg - FIRST_LO_REGNUM);
8299   return mask;
8300 }
8301
8302 /* Similarly for the start of the epilogue.  */
8303 static unsigned long
8304 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8305 {
8306   unsigned long mask = 0;
8307   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8308
8309   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8310     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8311       mask |= 1 << (reg - FIRST_LO_REGNUM);
8312   return mask;
8313 }
8314
8315 /* Find a spare register to use during the prolog of a function.  */
8316
8317 static int
8318 thumb_find_work_register (unsigned long pushed_regs_mask)
8319 {
8320   int reg;
8321
8322   unsigned long unused_regs
8323     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8324
8325   /* Check the argument registers first as these are call-used.  The
8326      register allocation order means that sometimes r3 might be used
8327      but earlier argument registers might not, so check them all.  */
8328   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8329     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8330       return reg;
8331
8332   /* Otherwise look for a call-saved register that is going to be pushed.  */
8333   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8334     if (pushed_regs_mask & (1 << reg))
8335       return reg;
8336
8337   if (TARGET_THUMB2)
8338     {
8339       /* Thumb-2 can use high regs.  */
8340       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8341         if (pushed_regs_mask & (1 << reg))
8342           return reg;
8343     }
8344   /* Something went wrong - thumb_compute_save_reg_mask()
8345      should have arranged for a suitable register to be pushed.  */
8346   gcc_unreachable ();
8347 }
8348
8349 static GTY(()) int pic_labelno;
8350
8351 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8352    low register.  */
8353
8354 void
8355 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8356 {
8357   rtx l1, labelno, pic_tmp, pic_rtx;
8358
8359   if (crtl->uses_pic_offset_table == 0
8360       || TARGET_SINGLE_PIC_BASE
8361       || TARGET_FDPIC)
8362     return;
8363
8364   gcc_assert (flag_pic);
8365
8366   if (pic_reg == NULL_RTX)
8367     pic_reg = cfun->machine->pic_reg;
8368   if (TARGET_VXWORKS_RTP)
8369     {
8370       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8371       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8372       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8373
8374       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8375
8376       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8377       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8378     }
8379   else
8380     {
8381       /* We use an UNSPEC rather than a LABEL_REF because this label
8382          never appears in the code stream.  */
8383
8384       labelno = GEN_INT (pic_labelno++);
8385       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8386       l1 = gen_rtx_CONST (VOIDmode, l1);
8387
8388       /* On the ARM the PC register contains 'dot + 8' at the time of the
8389          addition, on the Thumb it is 'dot + 4'.  */
8390       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8391       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8392                                 UNSPEC_GOTSYM_OFF);
8393       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8394
8395       if (TARGET_32BIT)
8396         {
8397           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8398         }
8399       else /* TARGET_THUMB1 */
8400         {
8401           if (arm_pic_register != INVALID_REGNUM
8402               && REGNO (pic_reg) > LAST_LO_REGNUM)
8403             {
8404               /* We will have pushed the pic register, so we should always be
8405                  able to find a work register.  */
8406               pic_tmp = gen_rtx_REG (SImode,
8407                                      thumb_find_work_register (saved_regs));
8408               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8409               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8410               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8411             }
8412           else if (arm_pic_register != INVALID_REGNUM
8413                    && arm_pic_register > LAST_LO_REGNUM
8414                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8415             {
8416               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8417               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8418               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8419             }
8420           else
8421             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8422         }
8423     }
8424
8425   /* Need to emit this whether or not we obey regdecls,
8426      since setjmp/longjmp can cause life info to screw up.  */
8427   emit_use (pic_reg);
8428 }
8429
8430 /* Try to determine whether an object, referenced via ORIG, will be
8431    placed in the text or data segment.  This is used in FDPIC mode, to
8432    decide which relocations to use when accessing ORIG.  *IS_READONLY
8433    is set to true if ORIG is a read-only location, false otherwise.
8434    Return true if we could determine the location of ORIG, false
8435    otherwise.  *IS_READONLY is valid only when we return true.  */
8436 static bool
8437 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8438 {
8439   *is_readonly = false;
8440
8441   if (LABEL_REF_P (orig))
8442     {
8443       *is_readonly = true;
8444       return true;
8445     }
8446
8447   if (SYMBOL_REF_P (orig))
8448     {
8449       if (CONSTANT_POOL_ADDRESS_P (orig))
8450         {
8451           *is_readonly = true;
8452           return true;
8453         }
8454       if (SYMBOL_REF_LOCAL_P (orig)
8455           && !SYMBOL_REF_EXTERNAL_P (orig)
8456           && SYMBOL_REF_DECL (orig)
8457           && (!DECL_P (SYMBOL_REF_DECL (orig))
8458               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8459         {
8460           tree decl = SYMBOL_REF_DECL (orig);
8461           tree init = VAR_P (decl)
8462             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8463             ? decl : 0;
8464           int reloc = 0;
8465           bool named_section, readonly;
8466
8467           if (init && init != error_mark_node)
8468             reloc = compute_reloc_for_constant (init);
8469
8470           named_section = VAR_P (decl)
8471             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8472           readonly = decl_readonly_section (decl, reloc);
8473
8474           /* We don't know where the link script will put a named
8475              section, so return false in such a case.  */
8476           if (named_section)
8477             return false;
8478
8479           *is_readonly = readonly;
8480           return true;
8481         }
8482
8483       /* We don't know.  */
8484       return false;
8485     }
8486
8487   gcc_unreachable ();
8488 }
8489
8490 /* Generate code to load the address of a static var when flag_pic is set.  */
8491 static rtx_insn *
8492 arm_pic_static_addr (rtx orig, rtx reg)
8493 {
8494   rtx l1, labelno, offset_rtx;
8495   rtx_insn *insn;
8496
8497   gcc_assert (flag_pic);
8498
8499   bool is_readonly = false;
8500   bool info_known = false;
8501
8502   if (TARGET_FDPIC
8503       && SYMBOL_REF_P (orig)
8504       && !SYMBOL_REF_FUNCTION_P (orig))
8505     info_known = arm_is_segment_info_known (orig, &is_readonly);
8506
8507   if (TARGET_FDPIC
8508       && SYMBOL_REF_P (orig)
8509       && !SYMBOL_REF_FUNCTION_P (orig)
8510       && !info_known)
8511     {
8512       /* We don't know where orig is stored, so we have be
8513          pessimistic and use a GOT relocation.  */
8514       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8515
8516       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8517     }
8518   else if (TARGET_FDPIC
8519            && SYMBOL_REF_P (orig)
8520            && (SYMBOL_REF_FUNCTION_P (orig)
8521                || !is_readonly))
8522     {
8523       /* We use the GOTOFF relocation.  */
8524       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8525
8526       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8527       emit_insn (gen_movsi (reg, l1));
8528       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8529     }
8530   else
8531     {
8532       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8533          PC-relative access.  */
8534       /* We use an UNSPEC rather than a LABEL_REF because this label
8535          never appears in the code stream.  */
8536       labelno = GEN_INT (pic_labelno++);
8537       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8538       l1 = gen_rtx_CONST (VOIDmode, l1);
8539
8540       /* On the ARM the PC register contains 'dot + 8' at the time of the
8541          addition, on the Thumb it is 'dot + 4'.  */
8542       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8543       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8544                                    UNSPEC_SYMBOL_OFFSET);
8545       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8546
8547       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8548                                                    labelno));
8549     }
8550
8551   return insn;
8552 }
8553
8554 /* Return nonzero if X is valid as an ARM state addressing register.  */
8555 static int
8556 arm_address_register_rtx_p (rtx x, int strict_p)
8557 {
8558   int regno;
8559
8560   if (!REG_P (x))
8561     return 0;
8562
8563   regno = REGNO (x);
8564
8565   if (strict_p)
8566     return ARM_REGNO_OK_FOR_BASE_P (regno);
8567
8568   return (regno <= LAST_ARM_REGNUM
8569           || regno >= FIRST_PSEUDO_REGISTER
8570           || regno == FRAME_POINTER_REGNUM
8571           || regno == ARG_POINTER_REGNUM);
8572 }
8573
8574 /* Return TRUE if this rtx is the difference of a symbol and a label,
8575    and will reduce to a PC-relative relocation in the object file.
8576    Expressions like this can be left alone when generating PIC, rather
8577    than forced through the GOT.  */
8578 static int
8579 pcrel_constant_p (rtx x)
8580 {
8581   if (GET_CODE (x) == MINUS)
8582     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8583
8584   return FALSE;
8585 }
8586
8587 /* Return true if X will surely end up in an index register after next
8588    splitting pass.  */
8589 static bool
8590 will_be_in_index_register (const_rtx x)
8591 {
8592   /* arm.md: calculate_pic_address will split this into a register.  */
8593   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8594 }
8595
8596 /* Return nonzero if X is a valid ARM state address operand.  */
8597 int
8598 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8599                                 int strict_p)
8600 {
8601   bool use_ldrd;
8602   enum rtx_code code = GET_CODE (x);
8603
8604   if (arm_address_register_rtx_p (x, strict_p))
8605     return 1;
8606
8607   use_ldrd = (TARGET_LDRD
8608               && (mode == DImode || mode == DFmode));
8609
8610   if (code == POST_INC || code == PRE_DEC
8611       || ((code == PRE_INC || code == POST_DEC)
8612           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8613     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8614
8615   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8616            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8617            && GET_CODE (XEXP (x, 1)) == PLUS
8618            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8619     {
8620       rtx addend = XEXP (XEXP (x, 1), 1);
8621
8622       /* Don't allow ldrd post increment by register because it's hard
8623          to fixup invalid register choices.  */
8624       if (use_ldrd
8625           && GET_CODE (x) == POST_MODIFY
8626           && REG_P (addend))
8627         return 0;
8628
8629       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8630               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8631     }
8632
8633   /* After reload constants split into minipools will have addresses
8634      from a LABEL_REF.  */
8635   else if (reload_completed
8636            && (code == LABEL_REF
8637                || (code == CONST
8638                    && GET_CODE (XEXP (x, 0)) == PLUS
8639                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8640                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8641     return 1;
8642
8643   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8644     return 0;
8645
8646   else if (code == PLUS)
8647     {
8648       rtx xop0 = XEXP (x, 0);
8649       rtx xop1 = XEXP (x, 1);
8650
8651       return ((arm_address_register_rtx_p (xop0, strict_p)
8652                && ((CONST_INT_P (xop1)
8653                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8654                    || (!strict_p && will_be_in_index_register (xop1))))
8655               || (arm_address_register_rtx_p (xop1, strict_p)
8656                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8657     }
8658
8659 #if 0
8660   /* Reload currently can't handle MINUS, so disable this for now */
8661   else if (GET_CODE (x) == MINUS)
8662     {
8663       rtx xop0 = XEXP (x, 0);
8664       rtx xop1 = XEXP (x, 1);
8665
8666       return (arm_address_register_rtx_p (xop0, strict_p)
8667               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8668     }
8669 #endif
8670
8671   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8672            && code == SYMBOL_REF
8673            && CONSTANT_POOL_ADDRESS_P (x)
8674            && ! (flag_pic
8675                  && symbol_mentioned_p (get_pool_constant (x))
8676                  && ! pcrel_constant_p (get_pool_constant (x))))
8677     return 1;
8678
8679   return 0;
8680 }
8681
8682 /* Return true if we can avoid creating a constant pool entry for x.  */
8683 static bool
8684 can_avoid_literal_pool_for_label_p (rtx x)
8685 {
8686   /* Normally we can assign constant values to target registers without
8687      the help of constant pool.  But there are cases we have to use constant
8688      pool like:
8689      1) assign a label to register.
8690      2) sign-extend a 8bit value to 32bit and then assign to register.
8691
8692      Constant pool access in format:
8693      (set (reg r0) (mem (symbol_ref (".LC0"))))
8694      will cause the use of literal pool (later in function arm_reorg).
8695      So here we mark such format as an invalid format, then the compiler
8696      will adjust it into:
8697      (set (reg r0) (symbol_ref (".LC0")))
8698      (set (reg r0) (mem (reg r0))).
8699      No extra register is required, and (mem (reg r0)) won't cause the use
8700      of literal pools.  */
8701   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8702       && CONSTANT_POOL_ADDRESS_P (x))
8703     return 1;
8704   return 0;
8705 }
8706
8707
8708 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8709 static int
8710 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8711 {
8712   bool use_ldrd;
8713   enum rtx_code code = GET_CODE (x);
8714
8715   /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8716      can store and load it like any other 16-bit value.  */
8717   if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8718     mode = HImode;
8719
8720   if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8721     return mve_vector_mem_operand (mode, x, strict_p);
8722
8723   if (arm_address_register_rtx_p (x, strict_p))
8724     return 1;
8725
8726   use_ldrd = (TARGET_LDRD
8727               && (mode == DImode || mode == DFmode));
8728
8729   if (code == POST_INC || code == PRE_DEC
8730       || ((code == PRE_INC || code == POST_DEC)
8731           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8732     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8733
8734   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8735            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8736            && GET_CODE (XEXP (x, 1)) == PLUS
8737            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8738     {
8739       /* Thumb-2 only has autoincrement by constant.  */
8740       rtx addend = XEXP (XEXP (x, 1), 1);
8741       HOST_WIDE_INT offset;
8742
8743       if (!CONST_INT_P (addend))
8744         return 0;
8745
8746       offset = INTVAL(addend);
8747       if (GET_MODE_SIZE (mode) <= 4)
8748         return (offset > -256 && offset < 256);
8749
8750       return (use_ldrd && offset > -1024 && offset < 1024
8751               && (offset & 3) == 0);
8752     }
8753
8754   /* After reload constants split into minipools will have addresses
8755      from a LABEL_REF.  */
8756   else if (reload_completed
8757            && (code == LABEL_REF
8758                || (code == CONST
8759                    && GET_CODE (XEXP (x, 0)) == PLUS
8760                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8761                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8762     return 1;
8763
8764   else if (mode == TImode
8765            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8766            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8767     return 0;
8768
8769   else if (code == PLUS)
8770     {
8771       rtx xop0 = XEXP (x, 0);
8772       rtx xop1 = XEXP (x, 1);
8773
8774       return ((arm_address_register_rtx_p (xop0, strict_p)
8775                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8776                    || (!strict_p && will_be_in_index_register (xop1))))
8777               || (arm_address_register_rtx_p (xop1, strict_p)
8778                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8779     }
8780
8781   else if (can_avoid_literal_pool_for_label_p (x))
8782     return 0;
8783
8784   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8785            && code == SYMBOL_REF
8786            && CONSTANT_POOL_ADDRESS_P (x)
8787            && ! (flag_pic
8788                  && symbol_mentioned_p (get_pool_constant (x))
8789                  && ! pcrel_constant_p (get_pool_constant (x))))
8790     return 1;
8791
8792   return 0;
8793 }
8794
8795 /* Return nonzero if INDEX is valid for an address index operand in
8796    ARM state.  */
8797 static int
8798 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8799                         int strict_p)
8800 {
8801   HOST_WIDE_INT range;
8802   enum rtx_code code = GET_CODE (index);
8803
8804   /* Standard coprocessor addressing modes.  */
8805   if (TARGET_HARD_FLOAT
8806       && (mode == SFmode || mode == DFmode))
8807     return (code == CONST_INT && INTVAL (index) < 1024
8808             && INTVAL (index) > -1024
8809             && (INTVAL (index) & 3) == 0);
8810
8811   /* For quad modes, we restrict the constant offset to be slightly less
8812      than what the instruction format permits.  We do this because for
8813      quad mode moves, we will actually decompose them into two separate
8814      double-mode reads or writes.  INDEX must therefore be a valid
8815      (double-mode) offset and so should INDEX+8.  */
8816   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8817     return (code == CONST_INT
8818             && INTVAL (index) < 1016
8819             && INTVAL (index) > -1024
8820             && (INTVAL (index) & 3) == 0);
8821
8822   /* We have no such constraint on double mode offsets, so we permit the
8823      full range of the instruction format.  */
8824   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8825     return (code == CONST_INT
8826             && INTVAL (index) < 1024
8827             && INTVAL (index) > -1024
8828             && (INTVAL (index) & 3) == 0);
8829
8830   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8831     return (code == CONST_INT
8832             && INTVAL (index) < 1024
8833             && INTVAL (index) > -1024
8834             && (INTVAL (index) & 3) == 0);
8835
8836   if (arm_address_register_rtx_p (index, strict_p)
8837       && (GET_MODE_SIZE (mode) <= 4))
8838     return 1;
8839
8840   if (mode == DImode || mode == DFmode)
8841     {
8842       if (code == CONST_INT)
8843         {
8844           HOST_WIDE_INT val = INTVAL (index);
8845
8846           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8847              If vldr is selected it uses arm_coproc_mem_operand.  */
8848           if (TARGET_LDRD)
8849             return val > -256 && val < 256;
8850           else
8851             return val > -4096 && val < 4092;
8852         }
8853
8854       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8855     }
8856
8857   if (GET_MODE_SIZE (mode) <= 4
8858       && ! (arm_arch4
8859             && (mode == HImode
8860                 || mode == HFmode
8861                 || (mode == QImode && outer == SIGN_EXTEND))))
8862     {
8863       if (code == MULT)
8864         {
8865           rtx xiop0 = XEXP (index, 0);
8866           rtx xiop1 = XEXP (index, 1);
8867
8868           return ((arm_address_register_rtx_p (xiop0, strict_p)
8869                    && power_of_two_operand (xiop1, SImode))
8870                   || (arm_address_register_rtx_p (xiop1, strict_p)
8871                       && power_of_two_operand (xiop0, SImode)));
8872         }
8873       else if (code == LSHIFTRT || code == ASHIFTRT
8874                || code == ASHIFT || code == ROTATERT)
8875         {
8876           rtx op = XEXP (index, 1);
8877
8878           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8879                   && CONST_INT_P (op)
8880                   && INTVAL (op) > 0
8881                   && INTVAL (op) <= 31);
8882         }
8883     }
8884
8885   /* For ARM v4 we may be doing a sign-extend operation during the
8886      load.  */
8887   if (arm_arch4)
8888     {
8889       if (mode == HImode
8890           || mode == HFmode
8891           || (outer == SIGN_EXTEND && mode == QImode))
8892         range = 256;
8893       else
8894         range = 4096;
8895     }
8896   else
8897     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8898
8899   return (code == CONST_INT
8900           && INTVAL (index) < range
8901           && INTVAL (index) > -range);
8902 }
8903
8904 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8905    index operand.  i.e. 1, 2, 4 or 8.  */
8906 static bool
8907 thumb2_index_mul_operand (rtx op)
8908 {
8909   HOST_WIDE_INT val;
8910
8911   if (!CONST_INT_P (op))
8912     return false;
8913
8914   val = INTVAL(op);
8915   return (val == 1 || val == 2 || val == 4 || val == 8);
8916 }
8917
8918 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8919 static int
8920 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8921 {
8922   enum rtx_code code = GET_CODE (index);
8923
8924   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8925   /* Standard coprocessor addressing modes.  */
8926   if (TARGET_VFP_BASE
8927       && (mode == SFmode || mode == DFmode))
8928     return (code == CONST_INT && INTVAL (index) < 1024
8929             /* Thumb-2 allows only > -256 index range for it's core register
8930                load/stores. Since we allow SF/DF in core registers, we have
8931                to use the intersection between -256~4096 (core) and -1024~1024
8932                (coprocessor).  */
8933             && INTVAL (index) > -256
8934             && (INTVAL (index) & 3) == 0);
8935
8936   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8937     {
8938       /* For DImode assume values will usually live in core regs
8939          and only allow LDRD addressing modes.  */
8940       if (!TARGET_LDRD || mode != DImode)
8941         return (code == CONST_INT
8942                 && INTVAL (index) < 1024
8943                 && INTVAL (index) > -1024
8944                 && (INTVAL (index) & 3) == 0);
8945     }
8946
8947   /* For quad modes, we restrict the constant offset to be slightly less
8948      than what the instruction format permits.  We do this because for
8949      quad mode moves, we will actually decompose them into two separate
8950      double-mode reads or writes.  INDEX must therefore be a valid
8951      (double-mode) offset and so should INDEX+8.  */
8952   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8953     return (code == CONST_INT
8954             && INTVAL (index) < 1016
8955             && INTVAL (index) > -1024
8956             && (INTVAL (index) & 3) == 0);
8957
8958   /* We have no such constraint on double mode offsets, so we permit the
8959      full range of the instruction format.  */
8960   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8961     return (code == CONST_INT
8962             && INTVAL (index) < 1024
8963             && INTVAL (index) > -1024
8964             && (INTVAL (index) & 3) == 0);
8965
8966   if (arm_address_register_rtx_p (index, strict_p)
8967       && (GET_MODE_SIZE (mode) <= 4))
8968     return 1;
8969
8970   if (mode == DImode || mode == DFmode)
8971     {
8972       if (code == CONST_INT)
8973         {
8974           HOST_WIDE_INT val = INTVAL (index);
8975           /* Thumb-2 ldrd only has reg+const addressing modes.
8976              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8977              If vldr is selected it uses arm_coproc_mem_operand.  */
8978           if (TARGET_LDRD)
8979             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8980           else
8981             return IN_RANGE (val, -255, 4095 - 4);
8982         }
8983       else
8984         return 0;
8985     }
8986
8987   if (code == MULT)
8988     {
8989       rtx xiop0 = XEXP (index, 0);
8990       rtx xiop1 = XEXP (index, 1);
8991
8992       return ((arm_address_register_rtx_p (xiop0, strict_p)
8993                && thumb2_index_mul_operand (xiop1))
8994               || (arm_address_register_rtx_p (xiop1, strict_p)
8995                   && thumb2_index_mul_operand (xiop0)));
8996     }
8997   else if (code == ASHIFT)
8998     {
8999       rtx op = XEXP (index, 1);
9000
9001       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
9002               && CONST_INT_P (op)
9003               && INTVAL (op) > 0
9004               && INTVAL (op) <= 3);
9005     }
9006
9007   return (code == CONST_INT
9008           && INTVAL (index) < 4096
9009           && INTVAL (index) > -256);
9010 }
9011
9012 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
9013 static int
9014 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
9015 {
9016   int regno;
9017
9018   if (!REG_P (x))
9019     return 0;
9020
9021   regno = REGNO (x);
9022
9023   if (strict_p)
9024     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
9025
9026   return (regno <= LAST_LO_REGNUM
9027           || regno > LAST_VIRTUAL_REGISTER
9028           || regno == FRAME_POINTER_REGNUM
9029           || (GET_MODE_SIZE (mode) >= 4
9030               && (regno == STACK_POINTER_REGNUM
9031                   || regno >= FIRST_PSEUDO_REGISTER
9032                   || x == hard_frame_pointer_rtx
9033                   || x == arg_pointer_rtx)));
9034 }
9035
9036 /* Return nonzero if x is a legitimate index register.  This is the case
9037    for any base register that can access a QImode object.  */
9038 inline static int
9039 thumb1_index_register_rtx_p (rtx x, int strict_p)
9040 {
9041   return thumb1_base_register_rtx_p (x, QImode, strict_p);
9042 }
9043
9044 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9045
9046    The AP may be eliminated to either the SP or the FP, so we use the
9047    least common denominator, e.g. SImode, and offsets from 0 to 64.
9048
9049    ??? Verify whether the above is the right approach.
9050
9051    ??? Also, the FP may be eliminated to the SP, so perhaps that
9052    needs special handling also.
9053
9054    ??? Look at how the mips16 port solves this problem.  It probably uses
9055    better ways to solve some of these problems.
9056
9057    Although it is not incorrect, we don't accept QImode and HImode
9058    addresses based on the frame pointer or arg pointer until the
9059    reload pass starts.  This is so that eliminating such addresses
9060    into stack based ones won't produce impossible code.  */
9061 int
9062 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9063 {
9064   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9065     return 0;
9066
9067   /* ??? Not clear if this is right.  Experiment.  */
9068   if (GET_MODE_SIZE (mode) < 4
9069       && !(reload_in_progress || reload_completed)
9070       && (reg_mentioned_p (frame_pointer_rtx, x)
9071           || reg_mentioned_p (arg_pointer_rtx, x)
9072           || reg_mentioned_p (virtual_incoming_args_rtx, x)
9073           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9074           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9075           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9076     return 0;
9077
9078   /* Accept any base register.  SP only in SImode or larger.  */
9079   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9080     return 1;
9081
9082   /* This is PC relative data before arm_reorg runs.  */
9083   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9084            && SYMBOL_REF_P (x)
9085            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9086            && !arm_disable_literal_pool)
9087     return 1;
9088
9089   /* This is PC relative data after arm_reorg runs.  */
9090   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9091            && reload_completed
9092            && (LABEL_REF_P (x)
9093                || (GET_CODE (x) == CONST
9094                    && GET_CODE (XEXP (x, 0)) == PLUS
9095                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9096                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9097     return 1;
9098
9099   /* Post-inc indexing only supported for SImode and larger.  */
9100   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9101            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9102     return 1;
9103
9104   else if (GET_CODE (x) == PLUS)
9105     {
9106       /* REG+REG address can be any two index registers.  */
9107       /* We disallow FRAME+REG addressing since we know that FRAME
9108          will be replaced with STACK, and SP relative addressing only
9109          permits SP+OFFSET.  */
9110       if (GET_MODE_SIZE (mode) <= 4
9111           && XEXP (x, 0) != frame_pointer_rtx
9112           && XEXP (x, 1) != frame_pointer_rtx
9113           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9114           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9115               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9116         return 1;
9117
9118       /* REG+const has 5-7 bit offset for non-SP registers.  */
9119       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9120                 || XEXP (x, 0) == arg_pointer_rtx)
9121                && CONST_INT_P (XEXP (x, 1))
9122                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9123         return 1;
9124
9125       /* REG+const has 10-bit offset for SP, but only SImode and
9126          larger is supported.  */
9127       /* ??? Should probably check for DI/DFmode overflow here
9128          just like GO_IF_LEGITIMATE_OFFSET does.  */
9129       else if (REG_P (XEXP (x, 0))
9130                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9131                && GET_MODE_SIZE (mode) >= 4
9132                && CONST_INT_P (XEXP (x, 1))
9133                && INTVAL (XEXP (x, 1)) >= 0
9134                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9135                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9136         return 1;
9137
9138       else if (REG_P (XEXP (x, 0))
9139                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9140                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9141                    || VIRTUAL_REGISTER_P (XEXP (x, 0)))
9142                && GET_MODE_SIZE (mode) >= 4
9143                && CONST_INT_P (XEXP (x, 1))
9144                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9145         return 1;
9146     }
9147
9148   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9149            && GET_MODE_SIZE (mode) == 4
9150            && SYMBOL_REF_P (x)
9151            && CONSTANT_POOL_ADDRESS_P (x)
9152            && !arm_disable_literal_pool
9153            && ! (flag_pic
9154                  && symbol_mentioned_p (get_pool_constant (x))
9155                  && ! pcrel_constant_p (get_pool_constant (x))))
9156     return 1;
9157
9158   return 0;
9159 }
9160
9161 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9162    instruction of mode MODE.  */
9163 int
9164 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9165 {
9166   switch (GET_MODE_SIZE (mode))
9167     {
9168     case 1:
9169       return val >= 0 && val < 32;
9170
9171     case 2:
9172       return val >= 0 && val < 64 && (val & 1) == 0;
9173
9174     default:
9175       return (val >= 0
9176               && (val + GET_MODE_SIZE (mode)) <= 128
9177               && (val & 3) == 0);
9178     }
9179 }
9180
9181 bool
9182 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
9183 {
9184   if (TARGET_ARM)
9185     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9186   else if (TARGET_THUMB2)
9187     return thumb2_legitimate_address_p (mode, x, strict_p);
9188   else /* if (TARGET_THUMB1) */
9189     return thumb1_legitimate_address_p (mode, x, strict_p);
9190 }
9191
9192 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9193
9194    Given an rtx X being reloaded into a reg required to be
9195    in class CLASS, return the class of reg to actually use.
9196    In general this is just CLASS, but for the Thumb core registers and
9197    immediate constants we prefer a LO_REGS class or a subset.  */
9198
9199 static reg_class_t
9200 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9201 {
9202   if (TARGET_32BIT)
9203     return rclass;
9204   else
9205     {
9206       if (rclass == GENERAL_REGS)
9207         return LO_REGS;
9208       else
9209         return rclass;
9210     }
9211 }
9212
9213 /* Build the SYMBOL_REF for __tls_get_addr.  */
9214
9215 static GTY(()) rtx tls_get_addr_libfunc;
9216
9217 static rtx
9218 get_tls_get_addr (void)
9219 {
9220   if (!tls_get_addr_libfunc)
9221     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9222   return tls_get_addr_libfunc;
9223 }
9224
9225 rtx
9226 arm_load_tp (rtx target)
9227 {
9228   if (!target)
9229     target = gen_reg_rtx (SImode);
9230
9231   if (TARGET_HARD_TP)
9232     {
9233       /* Can return in any reg.  */
9234       emit_insn (gen_load_tp_hard (target));
9235     }
9236   else
9237     {
9238       /* Always returned in r0.  Immediately copy the result into a pseudo,
9239          otherwise other uses of r0 (e.g. setting up function arguments) may
9240          clobber the value.  */
9241
9242       rtx tmp;
9243
9244       if (TARGET_FDPIC)
9245         {
9246           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9247           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9248
9249           emit_insn (gen_load_tp_soft_fdpic ());
9250
9251           /* Restore r9.  */
9252           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9253         }
9254       else
9255         emit_insn (gen_load_tp_soft ());
9256
9257       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9258       emit_move_insn (target, tmp);
9259     }
9260   return target;
9261 }
9262
9263 static rtx
9264 load_tls_operand (rtx x, rtx reg)
9265 {
9266   rtx tmp;
9267
9268   if (reg == NULL_RTX)
9269     reg = gen_reg_rtx (SImode);
9270
9271   tmp = gen_rtx_CONST (SImode, x);
9272
9273   emit_move_insn (reg, tmp);
9274
9275   return reg;
9276 }
9277
9278 static rtx_insn *
9279 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9280 {
9281   rtx label, labelno = NULL_RTX, sum;
9282
9283   gcc_assert (reloc != TLS_DESCSEQ);
9284   start_sequence ();
9285
9286   if (TARGET_FDPIC)
9287     {
9288       sum = gen_rtx_UNSPEC (Pmode,
9289                             gen_rtvec (2, x, GEN_INT (reloc)),
9290                             UNSPEC_TLS);
9291     }
9292   else
9293     {
9294       labelno = GEN_INT (pic_labelno++);
9295       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9296       label = gen_rtx_CONST (VOIDmode, label);
9297
9298       sum = gen_rtx_UNSPEC (Pmode,
9299                             gen_rtvec (4, x, GEN_INT (reloc), label,
9300                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9301                             UNSPEC_TLS);
9302     }
9303   reg = load_tls_operand (sum, reg);
9304
9305   if (TARGET_FDPIC)
9306       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9307   else if (TARGET_ARM)
9308     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9309   else
9310     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9311
9312   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9313                                      LCT_PURE, /* LCT_CONST?  */
9314                                      Pmode, reg, Pmode);
9315
9316   rtx_insn *insns = get_insns ();
9317   end_sequence ();
9318
9319   return insns;
9320 }
9321
9322 static rtx
9323 arm_tls_descseq_addr (rtx x, rtx reg)
9324 {
9325   rtx labelno = GEN_INT (pic_labelno++);
9326   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9327   rtx sum = gen_rtx_UNSPEC (Pmode,
9328                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9329                                        gen_rtx_CONST (VOIDmode, label),
9330                                        GEN_INT (!TARGET_ARM)),
9331                             UNSPEC_TLS);
9332   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9333
9334   emit_insn (gen_tlscall (x, labelno));
9335   if (!reg)
9336     reg = gen_reg_rtx (SImode);
9337   else
9338     gcc_assert (REGNO (reg) != R0_REGNUM);
9339
9340   emit_move_insn (reg, reg0);
9341
9342   return reg;
9343 }
9344
9345
9346 rtx
9347 legitimize_tls_address (rtx x, rtx reg)
9348 {
9349   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9350   rtx_insn *insns;
9351   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9352
9353   switch (model)
9354     {
9355     case TLS_MODEL_GLOBAL_DYNAMIC:
9356       if (TARGET_GNU2_TLS)
9357         {
9358           gcc_assert (!TARGET_FDPIC);
9359
9360           reg = arm_tls_descseq_addr (x, reg);
9361
9362           tp = arm_load_tp (NULL_RTX);
9363
9364           dest = gen_rtx_PLUS (Pmode, tp, reg);
9365         }
9366       else
9367         {
9368           /* Original scheme */
9369           if (TARGET_FDPIC)
9370             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9371           else
9372             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9373           dest = gen_reg_rtx (Pmode);
9374           emit_libcall_block (insns, dest, ret, x);
9375         }
9376       return dest;
9377
9378     case TLS_MODEL_LOCAL_DYNAMIC:
9379       if (TARGET_GNU2_TLS)
9380         {
9381           gcc_assert (!TARGET_FDPIC);
9382
9383           reg = arm_tls_descseq_addr (x, reg);
9384
9385           tp = arm_load_tp (NULL_RTX);
9386
9387           dest = gen_rtx_PLUS (Pmode, tp, reg);
9388         }
9389       else
9390         {
9391           if (TARGET_FDPIC)
9392             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9393           else
9394             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9395
9396           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9397              share the LDM result with other LD model accesses.  */
9398           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9399                                 UNSPEC_TLS);
9400           dest = gen_reg_rtx (Pmode);
9401           emit_libcall_block (insns, dest, ret, eqv);
9402
9403           /* Load the addend.  */
9404           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9405                                                      GEN_INT (TLS_LDO32)),
9406                                    UNSPEC_TLS);
9407           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9408           dest = gen_rtx_PLUS (Pmode, dest, addend);
9409         }
9410       return dest;
9411
9412     case TLS_MODEL_INITIAL_EXEC:
9413       if (TARGET_FDPIC)
9414         {
9415           sum = gen_rtx_UNSPEC (Pmode,
9416                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9417                                 UNSPEC_TLS);
9418           reg = load_tls_operand (sum, reg);
9419           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9420           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9421         }
9422       else
9423         {
9424           labelno = GEN_INT (pic_labelno++);
9425           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9426           label = gen_rtx_CONST (VOIDmode, label);
9427           sum = gen_rtx_UNSPEC (Pmode,
9428                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9429                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9430                                 UNSPEC_TLS);
9431           reg = load_tls_operand (sum, reg);
9432
9433           if (TARGET_ARM)
9434             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9435           else if (TARGET_THUMB2)
9436             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9437           else
9438             {
9439               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9440               emit_move_insn (reg, gen_const_mem (SImode, reg));
9441             }
9442         }
9443
9444       tp = arm_load_tp (NULL_RTX);
9445
9446       return gen_rtx_PLUS (Pmode, tp, reg);
9447
9448     case TLS_MODEL_LOCAL_EXEC:
9449       tp = arm_load_tp (NULL_RTX);
9450
9451       reg = gen_rtx_UNSPEC (Pmode,
9452                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9453                             UNSPEC_TLS);
9454       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9455
9456       return gen_rtx_PLUS (Pmode, tp, reg);
9457
9458     default:
9459       abort ();
9460     }
9461 }
9462
9463 /* Try machine-dependent ways of modifying an illegitimate address
9464    to be legitimate.  If we find one, return the new, valid address.  */
9465 rtx
9466 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9467 {
9468   if (arm_tls_referenced_p (x))
9469     {
9470       rtx addend = NULL;
9471
9472       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9473         {
9474           addend = XEXP (XEXP (x, 0), 1);
9475           x = XEXP (XEXP (x, 0), 0);
9476         }
9477
9478       if (!SYMBOL_REF_P (x))
9479         return x;
9480
9481       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9482
9483       x = legitimize_tls_address (x, NULL_RTX);
9484
9485       if (addend)
9486         {
9487           x = gen_rtx_PLUS (SImode, x, addend);
9488           orig_x = x;
9489         }
9490       else
9491         return x;
9492     }
9493
9494   if (TARGET_THUMB1)
9495     return thumb_legitimize_address (x, orig_x, mode);
9496
9497   if (GET_CODE (x) == PLUS)
9498     {
9499       rtx xop0 = XEXP (x, 0);
9500       rtx xop1 = XEXP (x, 1);
9501
9502       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9503         xop0 = force_reg (SImode, xop0);
9504
9505       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9506           && !symbol_mentioned_p (xop1))
9507         xop1 = force_reg (SImode, xop1);
9508
9509       if (ARM_BASE_REGISTER_RTX_P (xop0)
9510           && CONST_INT_P (xop1))
9511         {
9512           HOST_WIDE_INT n, low_n;
9513           rtx base_reg, val;
9514           n = INTVAL (xop1);
9515
9516           /* VFP addressing modes actually allow greater offsets, but for
9517              now we just stick with the lowest common denominator.  */
9518           if (mode == DImode || mode == DFmode)
9519             {
9520               low_n = n & 0x0f;
9521               n &= ~0x0f;
9522               if (low_n > 4)
9523                 {
9524                   n += 16;
9525                   low_n -= 16;
9526                 }
9527             }
9528           else
9529             {
9530               low_n = ((mode) == TImode ? 0
9531                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9532               n -= low_n;
9533             }
9534
9535           base_reg = gen_reg_rtx (SImode);
9536           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9537           emit_move_insn (base_reg, val);
9538           x = plus_constant (Pmode, base_reg, low_n);
9539         }
9540       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9541         x = gen_rtx_PLUS (SImode, xop0, xop1);
9542     }
9543
9544   /* XXX We don't allow MINUS any more -- see comment in
9545      arm_legitimate_address_outer_p ().  */
9546   else if (GET_CODE (x) == MINUS)
9547     {
9548       rtx xop0 = XEXP (x, 0);
9549       rtx xop1 = XEXP (x, 1);
9550
9551       if (CONSTANT_P (xop0))
9552         xop0 = force_reg (SImode, xop0);
9553
9554       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9555         xop1 = force_reg (SImode, xop1);
9556
9557       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9558         x = gen_rtx_MINUS (SImode, xop0, xop1);
9559     }
9560
9561   /* Make sure to take full advantage of the pre-indexed addressing mode
9562      with absolute addresses which often allows for the base register to
9563      be factorized for multiple adjacent memory references, and it might
9564      even allows for the mini pool to be avoided entirely. */
9565   else if (CONST_INT_P (x) && optimize > 0)
9566     {
9567       unsigned int bits;
9568       HOST_WIDE_INT mask, base, index;
9569       rtx base_reg;
9570
9571       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9572          only use a 8-bit index. So let's use a 12-bit index for
9573          SImode only and hope that arm_gen_constant will enable LDRB
9574          to use more bits. */
9575       bits = (mode == SImode) ? 12 : 8;
9576       mask = (1 << bits) - 1;
9577       base = INTVAL (x) & ~mask;
9578       index = INTVAL (x) & mask;
9579       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9580         {
9581           /* It'll most probably be more efficient to generate the
9582              base with more bits set and use a negative index instead.
9583              Don't do this for Thumb as negative offsets are much more
9584              limited.  */
9585           base |= mask;
9586           index -= mask;
9587         }
9588       base_reg = force_reg (SImode, GEN_INT (base));
9589       x = plus_constant (Pmode, base_reg, index);
9590     }
9591
9592   if (flag_pic)
9593     {
9594       /* We need to find and carefully transform any SYMBOL and LABEL
9595          references; so go back to the original address expression.  */
9596       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9597                                           false /*compute_now*/);
9598
9599       if (new_x != orig_x)
9600         x = new_x;
9601     }
9602
9603   return x;
9604 }
9605
9606
9607 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9608    to be legitimate.  If we find one, return the new, valid address.  */
9609 rtx
9610 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9611 {
9612   if (GET_CODE (x) == PLUS
9613       && CONST_INT_P (XEXP (x, 1))
9614       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9615           || INTVAL (XEXP (x, 1)) < 0))
9616     {
9617       rtx xop0 = XEXP (x, 0);
9618       rtx xop1 = XEXP (x, 1);
9619       HOST_WIDE_INT offset = INTVAL (xop1);
9620
9621       /* Try and fold the offset into a biasing of the base register and
9622          then offsetting that.  Don't do this when optimizing for space
9623          since it can cause too many CSEs.  */
9624       if (optimize_size && offset >= 0
9625           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9626         {
9627           HOST_WIDE_INT delta;
9628
9629           if (offset >= 256)
9630             delta = offset - (256 - GET_MODE_SIZE (mode));
9631           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9632             delta = 31 * GET_MODE_SIZE (mode);
9633           else
9634             delta = offset & (~31 * GET_MODE_SIZE (mode));
9635
9636           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9637                                 NULL_RTX);
9638           x = plus_constant (Pmode, xop0, delta);
9639         }
9640       else if (offset < 0 && offset > -256)
9641         /* Small negative offsets are best done with a subtract before the
9642            dereference, forcing these into a register normally takes two
9643            instructions.  */
9644         x = force_operand (x, NULL_RTX);
9645       else
9646         {
9647           /* For the remaining cases, force the constant into a register.  */
9648           xop1 = force_reg (SImode, xop1);
9649           x = gen_rtx_PLUS (SImode, xop0, xop1);
9650         }
9651     }
9652   else if (GET_CODE (x) == PLUS
9653            && s_register_operand (XEXP (x, 1), SImode)
9654            && !s_register_operand (XEXP (x, 0), SImode))
9655     {
9656       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9657
9658       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9659     }
9660
9661   if (flag_pic)
9662     {
9663       /* We need to find and carefully transform any SYMBOL and LABEL
9664          references; so go back to the original address expression.  */
9665       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9666                                           false /*compute_now*/);
9667
9668       if (new_x != orig_x)
9669         x = new_x;
9670     }
9671
9672   return x;
9673 }
9674
9675 /* Return TRUE if X contains any TLS symbol references.  */
9676
9677 bool
9678 arm_tls_referenced_p (rtx x)
9679 {
9680   if (! TARGET_HAVE_TLS)
9681     return false;
9682
9683   subrtx_iterator::array_type array;
9684   FOR_EACH_SUBRTX (iter, array, x, ALL)
9685     {
9686       const_rtx x = *iter;
9687       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9688         {
9689           /* ARM currently does not provide relocations to encode TLS variables
9690              into AArch32 instructions, only data, so there is no way to
9691              currently implement these if a literal pool is disabled.  */
9692           if (arm_disable_literal_pool)
9693             sorry ("accessing thread-local storage is not currently supported "
9694                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9695
9696           return true;
9697         }
9698
9699       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9700          TLS offsets, not real symbol references.  */
9701       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9702         iter.skip_subrtxes ();
9703     }
9704   return false;
9705 }
9706
9707 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9708
9709    On the ARM, allow any integer (invalid ones are removed later by insn
9710    patterns), nice doubles and symbol_refs which refer to the function's
9711    constant pool XXX.
9712
9713    When generating pic allow anything.  */
9714
9715 static bool
9716 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9717 {
9718   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9719     return false;
9720
9721   return flag_pic || !label_mentioned_p (x);
9722 }
9723
9724 static bool
9725 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9726 {
9727   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9728      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9729      for ARMv8-M Baseline or later the result is valid.  */
9730   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9731     x = XEXP (x, 0);
9732
9733   return (CONST_INT_P (x)
9734           || CONST_DOUBLE_P (x)
9735           || CONSTANT_ADDRESS_P (x)
9736           || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9737           /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9738              we build the symbol address with upper/lower
9739              relocations.  */
9740           || (TARGET_THUMB1
9741               && !label_mentioned_p (x)
9742               && arm_valid_symbolic_address_p (x)
9743               && arm_disable_literal_pool)
9744           || flag_pic);
9745 }
9746
9747 static bool
9748 arm_legitimate_constant_p (machine_mode mode, rtx x)
9749 {
9750   return (!arm_cannot_force_const_mem (mode, x)
9751           && (TARGET_32BIT
9752               ? arm_legitimate_constant_p_1 (mode, x)
9753               : thumb_legitimate_constant_p (mode, x)));
9754 }
9755
9756 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9757
9758 static bool
9759 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9760 {
9761   rtx base, offset;
9762   split_const (x, &base, &offset);
9763
9764   if (SYMBOL_REF_P (base))
9765     {
9766       /* Function symbols cannot have an offset due to the Thumb bit.  */
9767       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9768           && INTVAL (offset) != 0)
9769         return true;
9770
9771       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9772           && !offset_within_block_p (base, INTVAL (offset)))
9773         return true;
9774     }
9775   return arm_tls_referenced_p (x);
9776 }
9777 \f
9778 #define REG_OR_SUBREG_REG(X)                                            \
9779   (REG_P (X)                                                    \
9780    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9781
9782 #define REG_OR_SUBREG_RTX(X)                    \
9783    (REG_P (X) ? (X) : SUBREG_REG (X))
9784
9785 static inline int
9786 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9787 {
9788   machine_mode mode = GET_MODE (x);
9789   int total, words;
9790
9791   switch (code)
9792     {
9793     case ASHIFT:
9794     case ASHIFTRT:
9795     case LSHIFTRT:
9796     case ROTATERT:
9797       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9798
9799     case PLUS:
9800     case MINUS:
9801     case COMPARE:
9802     case NEG:
9803     case NOT:
9804       return COSTS_N_INSNS (1);
9805
9806     case MULT:
9807       if (arm_arch6m && arm_m_profile_small_mul)
9808         return COSTS_N_INSNS (32);
9809
9810       if (CONST_INT_P (XEXP (x, 1)))
9811         {
9812           int cycles = 0;
9813           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9814
9815           while (i)
9816             {
9817               i >>= 2;
9818               cycles++;
9819             }
9820           return COSTS_N_INSNS (2) + cycles;
9821         }
9822       return COSTS_N_INSNS (1) + 16;
9823
9824     case SET:
9825       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9826          the mode.  */
9827       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9828       return (COSTS_N_INSNS (words)
9829               + 4 * ((MEM_P (SET_SRC (x)))
9830                      + MEM_P (SET_DEST (x))));
9831
9832     case CONST_INT:
9833       if (outer == SET)
9834         {
9835           if (UINTVAL (x) < 256
9836               /* 16-bit constant.  */
9837               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9838             return 0;
9839           if (thumb_shiftable_const (INTVAL (x)))
9840             return COSTS_N_INSNS (2);
9841           return arm_disable_literal_pool
9842             ? COSTS_N_INSNS (8)
9843             : COSTS_N_INSNS (3);
9844         }
9845       else if ((outer == PLUS || outer == COMPARE)
9846                && INTVAL (x) < 256 && INTVAL (x) > -256)
9847         return 0;
9848       else if ((outer == IOR || outer == XOR || outer == AND)
9849                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9850         return COSTS_N_INSNS (1);
9851       else if (outer == AND)
9852         {
9853           int i;
9854           /* This duplicates the tests in the andsi3 expander.  */
9855           for (i = 9; i <= 31; i++)
9856             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9857                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9858               return COSTS_N_INSNS (2);
9859         }
9860       else if (outer == ASHIFT || outer == ASHIFTRT
9861                || outer == LSHIFTRT)
9862         return 0;
9863       return COSTS_N_INSNS (2);
9864
9865     case CONST:
9866     case CONST_DOUBLE:
9867     case LABEL_REF:
9868     case SYMBOL_REF:
9869       return COSTS_N_INSNS (3);
9870
9871     case UDIV:
9872     case UMOD:
9873     case DIV:
9874     case MOD:
9875       return 100;
9876
9877     case TRUNCATE:
9878       return 99;
9879
9880     case AND:
9881     case XOR:
9882     case IOR:
9883       /* XXX guess.  */
9884       return 8;
9885
9886     case MEM:
9887       /* XXX another guess.  */
9888       /* Memory costs quite a lot for the first word, but subsequent words
9889          load at the equivalent of a single insn each.  */
9890       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9891               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9892                  ? 4 : 0));
9893
9894     case IF_THEN_ELSE:
9895       /* XXX a guess.  */
9896       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9897         return 14;
9898       return 2;
9899
9900     case SIGN_EXTEND:
9901     case ZERO_EXTEND:
9902       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9903       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9904
9905       if (mode == SImode)
9906         return total;
9907
9908       if (arm_arch6)
9909         return total + COSTS_N_INSNS (1);
9910
9911       /* Assume a two-shift sequence.  Increase the cost slightly so
9912          we prefer actual shifts over an extend operation.  */
9913       return total + 1 + COSTS_N_INSNS (2);
9914
9915     default:
9916       return 99;
9917     }
9918 }
9919
9920 /* Estimates the size cost of thumb1 instructions.
9921    For now most of the code is copied from thumb1_rtx_costs. We need more
9922    fine grain tuning when we have more related test cases.  */
9923 static inline int
9924 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9925 {
9926   machine_mode mode = GET_MODE (x);
9927   int words, cost;
9928
9929   switch (code)
9930     {
9931     case ASHIFT:
9932     case ASHIFTRT:
9933     case LSHIFTRT:
9934     case ROTATERT:
9935       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9936
9937     case PLUS:
9938     case MINUS:
9939       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9940          defined by RTL expansion, especially for the expansion of
9941          multiplication.  */
9942       if ((GET_CODE (XEXP (x, 0)) == MULT
9943            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9944           || (GET_CODE (XEXP (x, 1)) == MULT
9945               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9946         return COSTS_N_INSNS (2);
9947       /* Fall through.  */
9948     case COMPARE:
9949     case NEG:
9950     case NOT:
9951       return COSTS_N_INSNS (1);
9952
9953     case MULT:
9954       if (CONST_INT_P (XEXP (x, 1)))
9955         {
9956           /* Thumb1 mul instruction can't operate on const. We must Load it
9957              into a register first.  */
9958           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9959           /* For the targets which have a very small and high-latency multiply
9960              unit, we prefer to synthesize the mult with up to 5 instructions,
9961              giving a good balance between size and performance.  */
9962           if (arm_arch6m && arm_m_profile_small_mul)
9963             return COSTS_N_INSNS (5);
9964           else
9965             return COSTS_N_INSNS (1) + const_size;
9966         }
9967       return COSTS_N_INSNS (1);
9968
9969     case SET:
9970       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9971          the mode.  */
9972       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9973       cost = COSTS_N_INSNS (words);
9974       if (satisfies_constraint_J (SET_SRC (x))
9975           || satisfies_constraint_K (SET_SRC (x))
9976              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9977           || (CONST_INT_P (SET_SRC (x))
9978               && UINTVAL (SET_SRC (x)) >= 256
9979               && TARGET_HAVE_MOVT
9980               && satisfies_constraint_j (SET_SRC (x)))
9981              /* thumb1_movdi_insn.  */
9982           || ((words > 1) && MEM_P (SET_SRC (x))))
9983         cost += COSTS_N_INSNS (1);
9984       return cost;
9985
9986     case CONST_INT:
9987       if (outer == SET)
9988         {
9989           if (UINTVAL (x) < 256)
9990             return COSTS_N_INSNS (1);
9991           /* movw is 4byte long.  */
9992           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9993             return COSTS_N_INSNS (2);
9994           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9995           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9996             return COSTS_N_INSNS (2);
9997           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9998           if (thumb_shiftable_const (INTVAL (x)))
9999             return COSTS_N_INSNS (2);
10000           return arm_disable_literal_pool
10001             ? COSTS_N_INSNS (8)
10002             : COSTS_N_INSNS (3);
10003         }
10004       else if ((outer == PLUS || outer == COMPARE)
10005                && INTVAL (x) < 256 && INTVAL (x) > -256)
10006         return 0;
10007       else if ((outer == IOR || outer == XOR || outer == AND)
10008                && INTVAL (x) < 256 && INTVAL (x) >= -256)
10009         return COSTS_N_INSNS (1);
10010       else if (outer == AND)
10011         {
10012           int i;
10013           /* This duplicates the tests in the andsi3 expander.  */
10014           for (i = 9; i <= 31; i++)
10015             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
10016                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
10017               return COSTS_N_INSNS (2);
10018         }
10019       else if (outer == ASHIFT || outer == ASHIFTRT
10020                || outer == LSHIFTRT)
10021         return 0;
10022       return COSTS_N_INSNS (2);
10023
10024     case CONST:
10025     case CONST_DOUBLE:
10026     case LABEL_REF:
10027     case SYMBOL_REF:
10028       return COSTS_N_INSNS (3);
10029
10030     case UDIV:
10031     case UMOD:
10032     case DIV:
10033     case MOD:
10034       return 100;
10035
10036     case TRUNCATE:
10037       return 99;
10038
10039     case AND:
10040     case XOR:
10041     case IOR:
10042       return COSTS_N_INSNS (1);
10043
10044     case MEM:
10045       return (COSTS_N_INSNS (1)
10046               + COSTS_N_INSNS (1)
10047                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10048               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10049                  ? COSTS_N_INSNS (1) : 0));
10050
10051     case IF_THEN_ELSE:
10052       /* XXX a guess.  */
10053       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10054         return 14;
10055       return 2;
10056
10057     case ZERO_EXTEND:
10058       /* XXX still guessing.  */
10059       switch (GET_MODE (XEXP (x, 0)))
10060         {
10061           case E_QImode:
10062             return (1 + (mode == DImode ? 4 : 0)
10063                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10064
10065           case E_HImode:
10066             return (4 + (mode == DImode ? 4 : 0)
10067                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10068
10069           case E_SImode:
10070             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10071
10072           default:
10073             return 99;
10074         }
10075
10076     default:
10077       return 99;
10078     }
10079 }
10080
10081 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
10082    PLUS, adds the carry flag, then return the other operand.  If
10083    neither is a carry, return OP unchanged.  */
10084 static rtx
10085 strip_carry_operation (rtx op)
10086 {
10087   gcc_assert (GET_CODE (op) == PLUS);
10088   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10089     return XEXP (op, 1);
10090   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10091     return XEXP (op, 0);
10092   return op;
10093 }
10094
10095 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
10096    operand, then return the operand that is being shifted.  If the shift
10097    is not by a constant, then set SHIFT_REG to point to the operand.
10098    Return NULL if OP is not a shifter operand.  */
10099 static rtx
10100 shifter_op_p (rtx op, rtx *shift_reg)
10101 {
10102   enum rtx_code code = GET_CODE (op);
10103
10104   if (code == MULT && CONST_INT_P (XEXP (op, 1))
10105       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10106     return XEXP (op, 0);
10107   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10108     return XEXP (op, 0);
10109   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10110            || code == ASHIFTRT)
10111     {
10112       if (!CONST_INT_P (XEXP (op, 1)))
10113         *shift_reg = XEXP (op, 1);
10114       return XEXP (op, 0);
10115     }
10116
10117   return NULL;
10118 }
10119
10120 static bool
10121 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10122 {
10123   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10124   rtx_code code = GET_CODE (x);
10125   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10126
10127   switch (XINT (x, 1))
10128     {
10129     case UNSPEC_UNALIGNED_LOAD:
10130       /* We can only do unaligned loads into the integer unit, and we can't
10131          use LDM or LDRD.  */
10132       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10133       if (speed_p)
10134         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10135                   + extra_cost->ldst.load_unaligned);
10136
10137 #ifdef NOT_YET
10138       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10139                                  ADDR_SPACE_GENERIC, speed_p);
10140 #endif
10141       return true;
10142
10143     case UNSPEC_UNALIGNED_STORE:
10144       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10145       if (speed_p)
10146         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10147                   + extra_cost->ldst.store_unaligned);
10148
10149       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10150 #ifdef NOT_YET
10151       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10152                                  ADDR_SPACE_GENERIC, speed_p);
10153 #endif
10154       return true;
10155
10156     case UNSPEC_VRINTZ:
10157     case UNSPEC_VRINTP:
10158     case UNSPEC_VRINTM:
10159     case UNSPEC_VRINTR:
10160     case UNSPEC_VRINTX:
10161     case UNSPEC_VRINTA:
10162       if (speed_p)
10163         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10164
10165       return true;
10166     default:
10167       *cost = COSTS_N_INSNS (2);
10168       break;
10169     }
10170   return true;
10171 }
10172
10173 /* Cost of a libcall.  We assume one insn per argument, an amount for the
10174    call (one insn for -Os) and then one for processing the result.  */
10175 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10176
10177 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
10178         do                                                              \
10179           {                                                             \
10180             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
10181             if (shift_op != NULL                                        \
10182                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
10183               {                                                         \
10184                 if (shift_reg)                                          \
10185                   {                                                     \
10186                     if (speed_p)                                        \
10187                       *cost += extra_cost->alu.arith_shift_reg;         \
10188                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10189                                        ASHIFT, 1, speed_p);             \
10190                   }                                                     \
10191                 else if (speed_p)                                       \
10192                   *cost += extra_cost->alu.arith_shift;                 \
10193                                                                         \
10194                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
10195                                     ASHIFT, 0, speed_p)                 \
10196                           + rtx_cost (XEXP (x, 1 - IDX),                \
10197                                       GET_MODE (shift_op),              \
10198                                       OP, 1, speed_p));                 \
10199                 return true;                                            \
10200               }                                                         \
10201           }                                                             \
10202         while (0)
10203
10204 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
10205    considering the costs of the addressing mode and memory access
10206    separately.  */
10207 static bool
10208 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10209                int *cost, bool speed_p)
10210 {
10211   machine_mode mode = GET_MODE (x);
10212
10213   *cost = COSTS_N_INSNS (1);
10214
10215   if (flag_pic
10216       && GET_CODE (XEXP (x, 0)) == PLUS
10217       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10218     /* This will be split into two instructions.  Add the cost of the
10219        additional instruction here.  The cost of the memory access is computed
10220        below.  See arm.md:calculate_pic_address.  */
10221     *cost += COSTS_N_INSNS (1);
10222
10223   /* Calculate cost of the addressing mode.  */
10224   if (speed_p)
10225     {
10226       arm_addr_mode_op op_type;
10227       switch (GET_CODE (XEXP (x, 0)))
10228         {
10229         default:
10230         case REG:
10231           op_type = AMO_DEFAULT;
10232           break;
10233         case MINUS:
10234           /* MINUS does not appear in RTL, but the architecture supports it,
10235              so handle this case defensively.  */
10236           /* fall through */
10237         case PLUS:
10238           op_type = AMO_NO_WB;
10239           break;
10240         case PRE_INC:
10241         case PRE_DEC:
10242         case POST_INC:
10243         case POST_DEC:
10244         case PRE_MODIFY:
10245         case POST_MODIFY:
10246           op_type = AMO_WB;
10247           break;
10248         }
10249
10250       if (VECTOR_MODE_P (mode))
10251           *cost += current_tune->addr_mode_costs->vector[op_type];
10252       else if (FLOAT_MODE_P (mode))
10253           *cost += current_tune->addr_mode_costs->fp[op_type];
10254       else
10255           *cost += current_tune->addr_mode_costs->integer[op_type];
10256     }
10257
10258   /* Calculate cost of memory access.  */
10259   if (speed_p)
10260     {
10261       if (FLOAT_MODE_P (mode))
10262         {
10263           if (GET_MODE_SIZE (mode) == 8)
10264             *cost += extra_cost->ldst.loadd;
10265           else
10266             *cost += extra_cost->ldst.loadf;
10267         }
10268       else if (VECTOR_MODE_P (mode))
10269         *cost += extra_cost->ldst.loadv;
10270       else
10271         {
10272           /* Integer modes */
10273           if (GET_MODE_SIZE (mode) == 8)
10274             *cost += extra_cost->ldst.ldrd;
10275           else
10276             *cost += extra_cost->ldst.load;
10277         }
10278     }
10279
10280   return true;
10281 }
10282
10283 /* Helper for arm_bfi_p.  */
10284 static bool
10285 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10286 {
10287   unsigned HOST_WIDE_INT const1;
10288   unsigned HOST_WIDE_INT const2 = 0;
10289
10290   if (!CONST_INT_P (XEXP (op0, 1)))
10291     return false;
10292
10293   const1 = UINTVAL (XEXP (op0, 1));
10294   if (!CONST_INT_P (XEXP (op1, 1))
10295       || ~UINTVAL (XEXP (op1, 1)) != const1)
10296     return false;
10297
10298   if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10299       && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10300     {
10301       const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10302       *sub0 = XEXP (XEXP (op0, 0), 0);
10303     }
10304   else
10305     *sub0 = XEXP (op0, 0);
10306
10307   if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10308     return false;
10309
10310   *sub1 = XEXP (op1, 0);
10311   return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10312 }
10313
10314 /* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
10315    format looks something like:
10316
10317    (IOR (AND (reg1) (~const1))
10318         (AND (ASHIFT (reg2) (const2))
10319              (const1)))
10320
10321    where const1 is a consecutive sequence of 1-bits with the
10322    least-significant non-zero bit starting at bit position const2.  If
10323    const2 is zero, then the shift will not appear at all, due to
10324    canonicalization.  The two arms of the IOR expression may be
10325    flipped.  */
10326 static bool
10327 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10328 {
10329   if (GET_CODE (x) != IOR)
10330     return false;
10331   if (GET_CODE (XEXP (x, 0)) != AND
10332       || GET_CODE (XEXP (x, 1)) != AND)
10333     return false;
10334   return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10335           || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10336 }
10337
10338 /* RTX costs.  Make an estimate of the cost of executing the operation
10339    X, which is contained within an operation with code OUTER_CODE.
10340    SPEED_P indicates whether the cost desired is the performance cost,
10341    or the size cost.  The estimate is stored in COST and the return
10342    value is TRUE if the cost calculation is final, or FALSE if the
10343    caller should recurse through the operands of X to add additional
10344    costs.
10345
10346    We currently make no attempt to model the size savings of Thumb-2
10347    16-bit instructions.  At the normal points in compilation where
10348    this code is called we have no measure of whether the condition
10349    flags are live or not, and thus no realistic way to determine what
10350    the size will eventually be.  */
10351 static bool
10352 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10353                    const struct cpu_cost_table *extra_cost,
10354                    int *cost, bool speed_p)
10355 {
10356   machine_mode mode = GET_MODE (x);
10357
10358   *cost = COSTS_N_INSNS (1);
10359
10360   if (TARGET_THUMB1)
10361     {
10362       if (speed_p)
10363         *cost = thumb1_rtx_costs (x, code, outer_code);
10364       else
10365         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10366       return true;
10367     }
10368
10369   switch (code)
10370     {
10371     case SET:
10372       *cost = 0;
10373       /* SET RTXs don't have a mode so we get it from the destination.  */
10374       mode = GET_MODE (SET_DEST (x));
10375
10376       if (REG_P (SET_SRC (x))
10377           && REG_P (SET_DEST (x)))
10378         {
10379           /* Assume that most copies can be done with a single insn,
10380              unless we don't have HW FP, in which case everything
10381              larger than word mode will require two insns.  */
10382           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10383                                    && GET_MODE_SIZE (mode) > 4)
10384                                   || mode == DImode)
10385                                  ? 2 : 1);
10386           /* Conditional register moves can be encoded
10387              in 16 bits in Thumb mode.  */
10388           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10389             *cost >>= 1;
10390
10391           return true;
10392         }
10393
10394       if (CONST_INT_P (SET_SRC (x)))
10395         {
10396           /* Handle CONST_INT here, since the value doesn't have a mode
10397              and we would otherwise be unable to work out the true cost.  */
10398           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10399                             0, speed_p);
10400           outer_code = SET;
10401           /* Slightly lower the cost of setting a core reg to a constant.
10402              This helps break up chains and allows for better scheduling.  */
10403           if (REG_P (SET_DEST (x))
10404               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10405             *cost -= 1;
10406           x = SET_SRC (x);
10407           /* Immediate moves with an immediate in the range [0, 255] can be
10408              encoded in 16 bits in Thumb mode.  */
10409           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10410               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10411             *cost >>= 1;
10412           goto const_int_cost;
10413         }
10414
10415       return false;
10416
10417     case MEM:
10418       return arm_mem_costs (x, extra_cost, cost, speed_p);
10419
10420     case PARALLEL:
10421     {
10422    /* Calculations of LDM costs are complex.  We assume an initial cost
10423    (ldm_1st) which will load the number of registers mentioned in
10424    ldm_regs_per_insn_1st registers; then each additional
10425    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10426    formula for N regs is thus:
10427
10428    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10429                              + ldm_regs_per_insn_subsequent - 1)
10430                             / ldm_regs_per_insn_subsequent).
10431
10432    Additional costs may also be added for addressing.  A similar
10433    formula is used for STM.  */
10434
10435       bool is_ldm = load_multiple_operation (x, SImode);
10436       bool is_stm = store_multiple_operation (x, SImode);
10437
10438       if (is_ldm || is_stm)
10439         {
10440           if (speed_p)
10441             {
10442               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10443               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10444                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10445                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10446               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10447                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10448                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10449
10450               *cost += regs_per_insn_1st
10451                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10452                                             + regs_per_insn_sub - 1)
10453                                           / regs_per_insn_sub);
10454               return true;
10455             }
10456
10457         }
10458       return false;
10459     }
10460     case DIV:
10461     case UDIV:
10462       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10463           && (mode == SFmode || !TARGET_VFP_SINGLE))
10464         *cost += COSTS_N_INSNS (speed_p
10465                                ? extra_cost->fp[mode != SFmode].div : 0);
10466       else if (mode == SImode && TARGET_IDIV)
10467         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10468       else
10469         *cost = LIBCALL_COST (2);
10470
10471       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10472          possible udiv is prefered.  */
10473       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10474       return false;     /* All arguments must be in registers.  */
10475
10476     case MOD:
10477       /* MOD by a power of 2 can be expanded as:
10478          rsbs    r1, r0, #0
10479          and     r0, r0, #(n - 1)
10480          and     r1, r1, #(n - 1)
10481          rsbpl   r0, r1, #0.  */
10482       if (CONST_INT_P (XEXP (x, 1))
10483           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10484           && mode == SImode)
10485         {
10486           *cost += COSTS_N_INSNS (3);
10487
10488           if (speed_p)
10489             *cost += 2 * extra_cost->alu.logical
10490                      + extra_cost->alu.arith;
10491           return true;
10492         }
10493
10494     /* Fall-through.  */
10495     case UMOD:
10496       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10497          possible udiv is prefered.  */
10498       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10499       return false;     /* All arguments must be in registers.  */
10500
10501     case ROTATE:
10502       if (mode == SImode && REG_P (XEXP (x, 1)))
10503         {
10504           *cost += (COSTS_N_INSNS (1)
10505                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10506           if (speed_p)
10507             *cost += extra_cost->alu.shift_reg;
10508           return true;
10509         }
10510       /* Fall through */
10511     case ROTATERT:
10512     case ASHIFT:
10513     case LSHIFTRT:
10514     case ASHIFTRT:
10515       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10516         {
10517           *cost += (COSTS_N_INSNS (2)
10518                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10519           if (speed_p)
10520             *cost += 2 * extra_cost->alu.shift;
10521           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10522           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10523             *cost += 1;
10524           return true;
10525         }
10526       else if (mode == SImode)
10527         {
10528           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10529           /* Slightly disparage register shifts at -Os, but not by much.  */
10530           if (!CONST_INT_P (XEXP (x, 1)))
10531             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10532                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10533           return true;
10534         }
10535       else if (GET_MODE_CLASS (mode) == MODE_INT
10536                && GET_MODE_SIZE (mode) < 4)
10537         {
10538           if (code == ASHIFT)
10539             {
10540               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10541               /* Slightly disparage register shifts at -Os, but not by
10542                  much.  */
10543               if (!CONST_INT_P (XEXP (x, 1)))
10544                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10545                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10546             }
10547           else if (code == LSHIFTRT || code == ASHIFTRT)
10548             {
10549               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10550                 {
10551                   /* Can use SBFX/UBFX.  */
10552                   if (speed_p)
10553                     *cost += extra_cost->alu.bfx;
10554                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10555                 }
10556               else
10557                 {
10558                   *cost += COSTS_N_INSNS (1);
10559                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10560                   if (speed_p)
10561                     {
10562                       if (CONST_INT_P (XEXP (x, 1)))
10563                         *cost += 2 * extra_cost->alu.shift;
10564                       else
10565                         *cost += (extra_cost->alu.shift
10566                                   + extra_cost->alu.shift_reg);
10567                     }
10568                   else
10569                     /* Slightly disparage register shifts.  */
10570                     *cost += !CONST_INT_P (XEXP (x, 1));
10571                 }
10572             }
10573           else /* Rotates.  */
10574             {
10575               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10576               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10577               if (speed_p)
10578                 {
10579                   if (CONST_INT_P (XEXP (x, 1)))
10580                     *cost += (2 * extra_cost->alu.shift
10581                               + extra_cost->alu.log_shift);
10582                   else
10583                     *cost += (extra_cost->alu.shift
10584                               + extra_cost->alu.shift_reg
10585                               + extra_cost->alu.log_shift_reg);
10586                 }
10587             }
10588           return true;
10589         }
10590
10591       *cost = LIBCALL_COST (2);
10592       return false;
10593
10594     case BSWAP:
10595       if (arm_arch6)
10596         {
10597           if (mode == SImode)
10598             {
10599               if (speed_p)
10600                 *cost += extra_cost->alu.rev;
10601
10602               return false;
10603             }
10604         }
10605       else
10606         {
10607         /* No rev instruction available.  Look at arm_legacy_rev
10608            and thumb_legacy_rev for the form of RTL used then.  */
10609           if (TARGET_THUMB)
10610             {
10611               *cost += COSTS_N_INSNS (9);
10612
10613               if (speed_p)
10614                 {
10615                   *cost += 6 * extra_cost->alu.shift;
10616                   *cost += 3 * extra_cost->alu.logical;
10617                 }
10618             }
10619           else
10620             {
10621               *cost += COSTS_N_INSNS (4);
10622
10623               if (speed_p)
10624                 {
10625                   *cost += 2 * extra_cost->alu.shift;
10626                   *cost += extra_cost->alu.arith_shift;
10627                   *cost += 2 * extra_cost->alu.logical;
10628                 }
10629             }
10630           return true;
10631         }
10632       return false;
10633
10634     case MINUS:
10635       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10636           && (mode == SFmode || !TARGET_VFP_SINGLE))
10637         {
10638           if (GET_CODE (XEXP (x, 0)) == MULT
10639               || GET_CODE (XEXP (x, 1)) == MULT)
10640             {
10641               rtx mul_op0, mul_op1, sub_op;
10642
10643               if (speed_p)
10644                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10645
10646               if (GET_CODE (XEXP (x, 0)) == MULT)
10647                 {
10648                   mul_op0 = XEXP (XEXP (x, 0), 0);
10649                   mul_op1 = XEXP (XEXP (x, 0), 1);
10650                   sub_op = XEXP (x, 1);
10651                 }
10652               else
10653                 {
10654                   mul_op0 = XEXP (XEXP (x, 1), 0);
10655                   mul_op1 = XEXP (XEXP (x, 1), 1);
10656                   sub_op = XEXP (x, 0);
10657                 }
10658
10659               /* The first operand of the multiply may be optionally
10660                  negated.  */
10661               if (GET_CODE (mul_op0) == NEG)
10662                 mul_op0 = XEXP (mul_op0, 0);
10663
10664               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10665                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10666                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10667
10668               return true;
10669             }
10670
10671           if (speed_p)
10672             *cost += extra_cost->fp[mode != SFmode].addsub;
10673           return false;
10674         }
10675
10676       if (mode == SImode)
10677         {
10678           rtx shift_by_reg = NULL;
10679           rtx shift_op;
10680           rtx non_shift_op;
10681           rtx op0 = XEXP (x, 0);
10682           rtx op1 = XEXP (x, 1);
10683
10684           /* Factor out any borrow operation.  There's more than one way
10685              of expressing this; try to recognize them all.  */
10686           if (GET_CODE (op0) == MINUS)
10687             {
10688               if (arm_borrow_operation (op1, SImode))
10689                 {
10690                   op1 = XEXP (op0, 1);
10691                   op0 = XEXP (op0, 0);
10692                 }
10693               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10694                 op0 = XEXP (op0, 0);
10695             }
10696           else if (GET_CODE (op1) == PLUS
10697                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10698             op1 = XEXP (op1, 0);
10699           else if (GET_CODE (op0) == NEG
10700                    && arm_borrow_operation (op1, SImode))
10701             {
10702               /* Negate with carry-in.  For Thumb2 this is done with
10703                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10704                  RSC instruction that exists in Arm mode.  */
10705               if (speed_p)
10706                 *cost += (TARGET_THUMB2
10707                           ? extra_cost->alu.arith_shift
10708                           : extra_cost->alu.arith);
10709               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10710               return true;
10711             }
10712           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10713              Note we do mean ~borrow here.  */
10714           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10715             {
10716               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10717               return true;
10718             }
10719
10720           shift_op = shifter_op_p (op0, &shift_by_reg);
10721           if (shift_op == NULL)
10722             {
10723               shift_op = shifter_op_p (op1, &shift_by_reg);
10724               non_shift_op = op0;
10725             }
10726           else
10727             non_shift_op = op1;
10728
10729           if (shift_op != NULL)
10730             {
10731               if (shift_by_reg != NULL)
10732                 {
10733                   if (speed_p)
10734                     *cost += extra_cost->alu.arith_shift_reg;
10735                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10736                 }
10737               else if (speed_p)
10738                 *cost += extra_cost->alu.arith_shift;
10739
10740               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10741               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10742               return true;
10743             }
10744
10745           if (arm_arch_thumb2
10746               && GET_CODE (XEXP (x, 1)) == MULT)
10747             {
10748               /* MLS.  */
10749               if (speed_p)
10750                 *cost += extra_cost->mult[0].add;
10751               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10752               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10753               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10754               return true;
10755             }
10756
10757           if (CONST_INT_P (op0))
10758             {
10759               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10760                                             INTVAL (op0), NULL_RTX,
10761                                             NULL_RTX, 1, 0);
10762               *cost = COSTS_N_INSNS (insns);
10763               if (speed_p)
10764                 *cost += insns * extra_cost->alu.arith;
10765               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10766               return true;
10767             }
10768           else if (speed_p)
10769             *cost += extra_cost->alu.arith;
10770
10771           /* Don't recurse as we don't want to cost any borrow that
10772              we've stripped.  */
10773           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10774           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10775           return true;
10776         }
10777
10778       if (GET_MODE_CLASS (mode) == MODE_INT
10779           && GET_MODE_SIZE (mode) < 4)
10780         {
10781           rtx shift_op, shift_reg;
10782           shift_reg = NULL;
10783
10784           /* We check both sides of the MINUS for shifter operands since,
10785              unlike PLUS, it's not commutative.  */
10786
10787           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10788           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10789
10790           /* Slightly disparage, as we might need to widen the result.  */
10791           *cost += 1;
10792           if (speed_p)
10793             *cost += extra_cost->alu.arith;
10794
10795           if (CONST_INT_P (XEXP (x, 0)))
10796             {
10797               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10798               return true;
10799             }
10800
10801           return false;
10802         }
10803
10804       if (mode == DImode)
10805         {
10806           *cost += COSTS_N_INSNS (1);
10807
10808           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10809             {
10810               rtx op1 = XEXP (x, 1);
10811
10812               if (speed_p)
10813                 *cost += 2 * extra_cost->alu.arith;
10814
10815               if (GET_CODE (op1) == ZERO_EXTEND)
10816                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10817                                    0, speed_p);
10818               else
10819                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10820               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10821                                  0, speed_p);
10822               return true;
10823             }
10824           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10825             {
10826               if (speed_p)
10827                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10828               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10829                                   0, speed_p)
10830                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10831               return true;
10832             }
10833           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10834                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10835             {
10836               if (speed_p)
10837                 *cost += (extra_cost->alu.arith
10838                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10839                              ? extra_cost->alu.arith
10840                              : extra_cost->alu.arith_shift));
10841               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10842                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10843                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10844               return true;
10845             }
10846
10847           if (speed_p)
10848             *cost += 2 * extra_cost->alu.arith;
10849           return false;
10850         }
10851
10852       /* Vector mode?  */
10853
10854       *cost = LIBCALL_COST (2);
10855       return false;
10856
10857     case PLUS:
10858       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10859           && (mode == SFmode || !TARGET_VFP_SINGLE))
10860         {
10861           if (GET_CODE (XEXP (x, 0)) == MULT)
10862             {
10863               rtx mul_op0, mul_op1, add_op;
10864
10865               if (speed_p)
10866                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10867
10868               mul_op0 = XEXP (XEXP (x, 0), 0);
10869               mul_op1 = XEXP (XEXP (x, 0), 1);
10870               add_op = XEXP (x, 1);
10871
10872               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10873                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10874                         + rtx_cost (add_op, mode, code, 0, speed_p));
10875
10876               return true;
10877             }
10878
10879           if (speed_p)
10880             *cost += extra_cost->fp[mode != SFmode].addsub;
10881           return false;
10882         }
10883       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10884         {
10885           *cost = LIBCALL_COST (2);
10886           return false;
10887         }
10888
10889         /* Narrow modes can be synthesized in SImode, but the range
10890            of useful sub-operations is limited.  Check for shift operations
10891            on one of the operands.  Only left shifts can be used in the
10892            narrow modes.  */
10893       if (GET_MODE_CLASS (mode) == MODE_INT
10894           && GET_MODE_SIZE (mode) < 4)
10895         {
10896           rtx shift_op, shift_reg;
10897           shift_reg = NULL;
10898
10899           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10900
10901           if (CONST_INT_P (XEXP (x, 1)))
10902             {
10903               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10904                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10905                                             NULL_RTX, 1, 0);
10906               *cost = COSTS_N_INSNS (insns);
10907               if (speed_p)
10908                 *cost += insns * extra_cost->alu.arith;
10909               /* Slightly penalize a narrow operation as the result may
10910                  need widening.  */
10911               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10912               return true;
10913             }
10914
10915           /* Slightly penalize a narrow operation as the result may
10916              need widening.  */
10917           *cost += 1;
10918           if (speed_p)
10919             *cost += extra_cost->alu.arith;
10920
10921           return false;
10922         }
10923
10924       if (mode == SImode)
10925         {
10926           rtx shift_op, shift_reg;
10927
10928           if (TARGET_INT_SIMD
10929               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10930                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10931             {
10932               /* UXTA[BH] or SXTA[BH].  */
10933               if (speed_p)
10934                 *cost += extra_cost->alu.extend_arith;
10935               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10936                                   0, speed_p)
10937                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10938               return true;
10939             }
10940
10941           rtx op0 = XEXP (x, 0);
10942           rtx op1 = XEXP (x, 1);
10943
10944           /* Handle a side effect of adding in the carry to an addition.  */
10945           if (GET_CODE (op0) == PLUS
10946               && arm_carry_operation (op1, mode))
10947             {
10948               op1 = XEXP (op0, 1);
10949               op0 = XEXP (op0, 0);
10950             }
10951           else if (GET_CODE (op1) == PLUS
10952                    && arm_carry_operation (op0, mode))
10953             {
10954               op0 = XEXP (op1, 0);
10955               op1 = XEXP (op1, 1);
10956             }
10957           else if (GET_CODE (op0) == PLUS)
10958             {
10959               op0 = strip_carry_operation (op0);
10960               if (swap_commutative_operands_p (op0, op1))
10961                 std::swap (op0, op1);
10962             }
10963
10964           if (arm_carry_operation (op0, mode))
10965             {
10966               /* Adding the carry to a register is a canonicalization of
10967                  adding 0 to the register plus the carry.  */
10968               if (speed_p)
10969                 *cost += extra_cost->alu.arith;
10970               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10971               return true;
10972             }
10973
10974           shift_reg = NULL;
10975           shift_op = shifter_op_p (op0, &shift_reg);
10976           if (shift_op != NULL)
10977             {
10978               if (shift_reg)
10979                 {
10980                   if (speed_p)
10981                     *cost += extra_cost->alu.arith_shift_reg;
10982                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10983                 }
10984               else if (speed_p)
10985                 *cost += extra_cost->alu.arith_shift;
10986
10987               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10988                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10989               return true;
10990             }
10991
10992           if (GET_CODE (op0) == MULT)
10993             {
10994               rtx mul_op = op0;
10995
10996               if (TARGET_DSP_MULTIPLY
10997                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10998                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10999                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11000                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11001                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
11002                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
11003                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
11004                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
11005                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11006                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11007                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11008                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
11009                                       == 16))))))
11010                 {
11011                   /* SMLA[BT][BT].  */
11012                   if (speed_p)
11013                     *cost += extra_cost->mult[0].extend_add;
11014                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
11015                                       SIGN_EXTEND, 0, speed_p)
11016                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
11017                                         SIGN_EXTEND, 0, speed_p)
11018                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
11019                   return true;
11020                 }
11021
11022               if (speed_p)
11023                 *cost += extra_cost->mult[0].add;
11024               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
11025                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
11026                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
11027               return true;
11028             }
11029
11030           if (CONST_INT_P (op1))
11031             {
11032               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11033                                             INTVAL (op1), NULL_RTX,
11034                                             NULL_RTX, 1, 0);
11035               *cost = COSTS_N_INSNS (insns);
11036               if (speed_p)
11037                 *cost += insns * extra_cost->alu.arith;
11038               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11039               return true;
11040             }
11041
11042           if (speed_p)
11043             *cost += extra_cost->alu.arith;
11044
11045           /* Don't recurse here because we want to test the operands
11046              without any carry operation.  */
11047           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11048           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11049           return true;
11050         }
11051
11052       if (mode == DImode)
11053         {
11054           if (GET_CODE (XEXP (x, 0)) == MULT
11055               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11056                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11057                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11058                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11059             {
11060               if (speed_p)
11061                 *cost += extra_cost->mult[1].extend_add;
11062               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11063                                   ZERO_EXTEND, 0, speed_p)
11064                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11065                                     ZERO_EXTEND, 0, speed_p)
11066                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11067               return true;
11068             }
11069
11070           *cost += COSTS_N_INSNS (1);
11071
11072           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11073               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11074             {
11075               if (speed_p)
11076                 *cost += (extra_cost->alu.arith
11077                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11078                              ? extra_cost->alu.arith
11079                              : extra_cost->alu.arith_shift));
11080
11081               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11082                                   0, speed_p)
11083                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11084               return true;
11085             }
11086
11087           if (speed_p)
11088             *cost += 2 * extra_cost->alu.arith;
11089           return false;
11090         }
11091
11092       /* Vector mode?  */
11093       *cost = LIBCALL_COST (2);
11094       return false;
11095     case IOR:
11096       {
11097         rtx sub0, sub1;
11098         if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11099           {
11100             if (speed_p)
11101               *cost += extra_cost->alu.rev;
11102
11103             return true;
11104           }
11105         else if (mode == SImode && arm_arch_thumb2
11106                  && arm_bfi_p (x, &sub0, &sub1))
11107           {
11108             *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11109             *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11110             if (speed_p)
11111               *cost += extra_cost->alu.bfi;
11112
11113             return true;
11114           }
11115       }
11116
11117       /* Fall through.  */
11118     case AND: case XOR:
11119       if (mode == SImode)
11120         {
11121           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11122           rtx op0 = XEXP (x, 0);
11123           rtx shift_op, shift_reg;
11124
11125           if (subcode == NOT
11126               && (code == AND
11127                   || (code == IOR && TARGET_THUMB2)))
11128             op0 = XEXP (op0, 0);
11129
11130           shift_reg = NULL;
11131           shift_op = shifter_op_p (op0, &shift_reg);
11132           if (shift_op != NULL)
11133             {
11134               if (shift_reg)
11135                 {
11136                   if (speed_p)
11137                     *cost += extra_cost->alu.log_shift_reg;
11138                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11139                 }
11140               else if (speed_p)
11141                 *cost += extra_cost->alu.log_shift;
11142
11143               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11144                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11145               return true;
11146             }
11147
11148           if (CONST_INT_P (XEXP (x, 1)))
11149             {
11150               int insns = arm_gen_constant (code, SImode, NULL_RTX,
11151                                             INTVAL (XEXP (x, 1)), NULL_RTX,
11152                                             NULL_RTX, 1, 0);
11153
11154               *cost = COSTS_N_INSNS (insns);
11155               if (speed_p)
11156                 *cost += insns * extra_cost->alu.logical;
11157               *cost += rtx_cost (op0, mode, code, 0, speed_p);
11158               return true;
11159             }
11160
11161           if (speed_p)
11162             *cost += extra_cost->alu.logical;
11163           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11164                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11165           return true;
11166         }
11167
11168       if (mode == DImode)
11169         {
11170           rtx op0 = XEXP (x, 0);
11171           enum rtx_code subcode = GET_CODE (op0);
11172
11173           *cost += COSTS_N_INSNS (1);
11174
11175           if (subcode == NOT
11176               && (code == AND
11177                   || (code == IOR && TARGET_THUMB2)))
11178             op0 = XEXP (op0, 0);
11179
11180           if (GET_CODE (op0) == ZERO_EXTEND)
11181             {
11182               if (speed_p)
11183                 *cost += 2 * extra_cost->alu.logical;
11184
11185               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11186                                   0, speed_p)
11187                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11188               return true;
11189             }
11190           else if (GET_CODE (op0) == SIGN_EXTEND)
11191             {
11192               if (speed_p)
11193                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11194
11195               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11196                                   0, speed_p)
11197                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11198               return true;
11199             }
11200
11201           if (speed_p)
11202             *cost += 2 * extra_cost->alu.logical;
11203
11204           return true;
11205         }
11206       /* Vector mode?  */
11207
11208       *cost = LIBCALL_COST (2);
11209       return false;
11210
11211     case MULT:
11212       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11213           && (mode == SFmode || !TARGET_VFP_SINGLE))
11214         {
11215           rtx op0 = XEXP (x, 0);
11216
11217           if (GET_CODE (op0) == NEG && !flag_rounding_math)
11218             op0 = XEXP (op0, 0);
11219
11220           if (speed_p)
11221             *cost += extra_cost->fp[mode != SFmode].mult;
11222
11223           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11224                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11225           return true;
11226         }
11227       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11228         {
11229           *cost = LIBCALL_COST (2);
11230           return false;
11231         }
11232
11233       if (mode == SImode)
11234         {
11235           if (TARGET_DSP_MULTIPLY
11236               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11237                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11238                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11239                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11240                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11241                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11242                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11243                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11244                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11245                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11246                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11247                               && (INTVAL (XEXP (XEXP (x, 1), 1))
11248                                   == 16))))))
11249             {
11250               /* SMUL[TB][TB].  */
11251               if (speed_p)
11252                 *cost += extra_cost->mult[0].extend;
11253               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11254                                  SIGN_EXTEND, 0, speed_p);
11255               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11256                                  SIGN_EXTEND, 1, speed_p);
11257               return true;
11258             }
11259           if (speed_p)
11260             *cost += extra_cost->mult[0].simple;
11261           return false;
11262         }
11263
11264       if (mode == DImode)
11265         {
11266           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11267                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11268                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11269                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11270             {
11271               if (speed_p)
11272                 *cost += extra_cost->mult[1].extend;
11273               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11274                                   ZERO_EXTEND, 0, speed_p)
11275                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11276                                     ZERO_EXTEND, 0, speed_p));
11277               return true;
11278             }
11279
11280           *cost = LIBCALL_COST (2);
11281           return false;
11282         }
11283
11284       /* Vector mode?  */
11285       *cost = LIBCALL_COST (2);
11286       return false;
11287
11288     case NEG:
11289       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11290           && (mode == SFmode || !TARGET_VFP_SINGLE))
11291         {
11292           if (GET_CODE (XEXP (x, 0)) == MULT)
11293             {
11294               /* VNMUL.  */
11295               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11296               return true;
11297             }
11298
11299           if (speed_p)
11300             *cost += extra_cost->fp[mode != SFmode].neg;
11301
11302           return false;
11303         }
11304       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11305         {
11306           *cost = LIBCALL_COST (1);
11307           return false;
11308         }
11309
11310       if (mode == SImode)
11311         {
11312           if (GET_CODE (XEXP (x, 0)) == ABS)
11313             {
11314               *cost += COSTS_N_INSNS (1);
11315               /* Assume the non-flag-changing variant.  */
11316               if (speed_p)
11317                 *cost += (extra_cost->alu.log_shift
11318                           + extra_cost->alu.arith_shift);
11319               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11320               return true;
11321             }
11322
11323           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11324               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11325             {
11326               *cost += COSTS_N_INSNS (1);
11327               /* No extra cost for MOV imm and MVN imm.  */
11328               /* If the comparison op is using the flags, there's no further
11329                  cost, otherwise we need to add the cost of the comparison.  */
11330               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11331                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11332                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11333                 {
11334                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11335                   *cost += (COSTS_N_INSNS (1)
11336                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11337                                         0, speed_p)
11338                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11339                                         1, speed_p));
11340                   if (speed_p)
11341                     *cost += extra_cost->alu.arith;
11342                 }
11343               return true;
11344             }
11345
11346           if (speed_p)
11347             *cost += extra_cost->alu.arith;
11348           return false;
11349         }
11350
11351       if (GET_MODE_CLASS (mode) == MODE_INT
11352           && GET_MODE_SIZE (mode) < 4)
11353         {
11354           /* Slightly disparage, as we might need an extend operation.  */
11355           *cost += 1;
11356           if (speed_p)
11357             *cost += extra_cost->alu.arith;
11358           return false;
11359         }
11360
11361       if (mode == DImode)
11362         {
11363           *cost += COSTS_N_INSNS (1);
11364           if (speed_p)
11365             *cost += 2 * extra_cost->alu.arith;
11366           return false;
11367         }
11368
11369       /* Vector mode?  */
11370       *cost = LIBCALL_COST (1);
11371       return false;
11372
11373     case NOT:
11374       if (mode == SImode)
11375         {
11376           rtx shift_op;
11377           rtx shift_reg = NULL;
11378
11379           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11380
11381           if (shift_op)
11382             {
11383               if (shift_reg != NULL)
11384                 {
11385                   if (speed_p)
11386                     *cost += extra_cost->alu.log_shift_reg;
11387                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11388                 }
11389               else if (speed_p)
11390                 *cost += extra_cost->alu.log_shift;
11391               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11392               return true;
11393             }
11394
11395           if (speed_p)
11396             *cost += extra_cost->alu.logical;
11397           return false;
11398         }
11399       if (mode == DImode)
11400         {
11401           *cost += COSTS_N_INSNS (1);
11402           return false;
11403         }
11404
11405       /* Vector mode?  */
11406
11407       *cost += LIBCALL_COST (1);
11408       return false;
11409
11410     case IF_THEN_ELSE:
11411       {
11412         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11413           {
11414             *cost += COSTS_N_INSNS (3);
11415             return true;
11416           }
11417         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11418         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11419
11420         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11421         /* Assume that if one arm of the if_then_else is a register,
11422            that it will be tied with the result and eliminate the
11423            conditional insn.  */
11424         if (REG_P (XEXP (x, 1)))
11425           *cost += op2cost;
11426         else if (REG_P (XEXP (x, 2)))
11427           *cost += op1cost;
11428         else
11429           {
11430             if (speed_p)
11431               {
11432                 if (extra_cost->alu.non_exec_costs_exec)
11433                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11434                 else
11435                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11436               }
11437             else
11438               *cost += op1cost + op2cost;
11439           }
11440       }
11441       return true;
11442
11443     case COMPARE:
11444       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11445         *cost = 0;
11446       else
11447         {
11448           machine_mode op0mode;
11449           /* We'll mostly assume that the cost of a compare is the cost of the
11450              LHS.  However, there are some notable exceptions.  */
11451
11452           /* Floating point compares are never done as side-effects.  */
11453           op0mode = GET_MODE (XEXP (x, 0));
11454           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11455               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11456             {
11457               if (speed_p)
11458                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11459
11460               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11461                 {
11462                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11463                   return true;
11464                 }
11465
11466               return false;
11467             }
11468           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11469             {
11470               *cost = LIBCALL_COST (2);
11471               return false;
11472             }
11473
11474           /* DImode compares normally take two insns.  */
11475           if (op0mode == DImode)
11476             {
11477               *cost += COSTS_N_INSNS (1);
11478               if (speed_p)
11479                 *cost += 2 * extra_cost->alu.arith;
11480               return false;
11481             }
11482
11483           if (op0mode == SImode)
11484             {
11485               rtx shift_op;
11486               rtx shift_reg;
11487
11488               if (XEXP (x, 1) == const0_rtx
11489                   && !(REG_P (XEXP (x, 0))
11490                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11491                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11492                 {
11493                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11494
11495                   /* Multiply operations that set the flags are often
11496                      significantly more expensive.  */
11497                   if (speed_p
11498                       && GET_CODE (XEXP (x, 0)) == MULT
11499                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11500                     *cost += extra_cost->mult[0].flag_setting;
11501
11502                   if (speed_p
11503                       && GET_CODE (XEXP (x, 0)) == PLUS
11504                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11505                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11506                                                             0), 1), mode))
11507                     *cost += extra_cost->mult[0].flag_setting;
11508                   return true;
11509                 }
11510
11511               shift_reg = NULL;
11512               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11513               if (shift_op != NULL)
11514                 {
11515                   if (shift_reg != NULL)
11516                     {
11517                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11518                                          1, speed_p);
11519                       if (speed_p)
11520                         *cost += extra_cost->alu.arith_shift_reg;
11521                     }
11522                   else if (speed_p)
11523                     *cost += extra_cost->alu.arith_shift;
11524                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11525                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11526                   return true;
11527                 }
11528
11529               if (speed_p)
11530                 *cost += extra_cost->alu.arith;
11531               if (CONST_INT_P (XEXP (x, 1))
11532                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11533                 {
11534                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11535                   return true;
11536                 }
11537               return false;
11538             }
11539
11540           /* Vector mode?  */
11541
11542           *cost = LIBCALL_COST (2);
11543           return false;
11544         }
11545       return true;
11546
11547     case EQ:
11548     case GE:
11549     case GT:
11550     case LE:
11551     case LT:
11552       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11553          vcle and vclt). */
11554       if (TARGET_NEON
11555           && TARGET_HARD_FLOAT
11556           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11557           && (XEXP (x, 1) == CONST0_RTX (mode)))
11558         {
11559           *cost = 0;
11560           return true;
11561         }
11562
11563       /* Fall through.  */
11564     case NE:
11565     case LTU:
11566     case LEU:
11567     case GEU:
11568     case GTU:
11569     case ORDERED:
11570     case UNORDERED:
11571     case UNEQ:
11572     case UNLE:
11573     case UNLT:
11574     case UNGE:
11575     case UNGT:
11576     case LTGT:
11577       if (outer_code == SET)
11578         {
11579           /* Is it a store-flag operation?  */
11580           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11581               && XEXP (x, 1) == const0_rtx)
11582             {
11583               /* Thumb also needs an IT insn.  */
11584               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11585               return true;
11586             }
11587           if (XEXP (x, 1) == const0_rtx)
11588             {
11589               switch (code)
11590                 {
11591                 case LT:
11592                   /* LSR Rd, Rn, #31.  */
11593                   if (speed_p)
11594                     *cost += extra_cost->alu.shift;
11595                   break;
11596
11597                 case EQ:
11598                   /* RSBS T1, Rn, #0
11599                      ADC  Rd, Rn, T1.  */
11600
11601                 case NE:
11602                   /* SUBS T1, Rn, #1
11603                      SBC  Rd, Rn, T1.  */
11604                   *cost += COSTS_N_INSNS (1);
11605                   break;
11606
11607                 case LE:
11608                   /* RSBS T1, Rn, Rn, LSR #31
11609                      ADC  Rd, Rn, T1. */
11610                   *cost += COSTS_N_INSNS (1);
11611                   if (speed_p)
11612                     *cost += extra_cost->alu.arith_shift;
11613                   break;
11614
11615                 case GT:
11616                   /* RSB  Rd, Rn, Rn, ASR #1
11617                      LSR  Rd, Rd, #31.  */
11618                   *cost += COSTS_N_INSNS (1);
11619                   if (speed_p)
11620                     *cost += (extra_cost->alu.arith_shift
11621                               + extra_cost->alu.shift);
11622                   break;
11623
11624                 case GE:
11625                   /* ASR  Rd, Rn, #31
11626                      ADD  Rd, Rn, #1.  */
11627                   *cost += COSTS_N_INSNS (1);
11628                   if (speed_p)
11629                     *cost += extra_cost->alu.shift;
11630                   break;
11631
11632                 default:
11633                   /* Remaining cases are either meaningless or would take
11634                      three insns anyway.  */
11635                   *cost = COSTS_N_INSNS (3);
11636                   break;
11637                 }
11638               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11639               return true;
11640             }
11641           else
11642             {
11643               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11644               if (CONST_INT_P (XEXP (x, 1))
11645                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11646                 {
11647                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11648                   return true;
11649                 }
11650
11651               return false;
11652             }
11653         }
11654       /* Not directly inside a set.  If it involves the condition code
11655          register it must be the condition for a branch, cond_exec or
11656          I_T_E operation.  Since the comparison is performed elsewhere
11657          this is just the control part which has no additional
11658          cost.  */
11659       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11660                && XEXP (x, 1) == const0_rtx)
11661         {
11662           *cost = 0;
11663           return true;
11664         }
11665       return false;
11666
11667     case ABS:
11668       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11669           && (mode == SFmode || !TARGET_VFP_SINGLE))
11670         {
11671           if (speed_p)
11672             *cost += extra_cost->fp[mode != SFmode].neg;
11673
11674           return false;
11675         }
11676       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11677         {
11678           *cost = LIBCALL_COST (1);
11679           return false;
11680         }
11681
11682       if (mode == SImode)
11683         {
11684           if (speed_p)
11685             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11686           return false;
11687         }
11688       /* Vector mode?  */
11689       *cost = LIBCALL_COST (1);
11690       return false;
11691
11692     case SIGN_EXTEND:
11693       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11694           && MEM_P (XEXP (x, 0)))
11695         {
11696           if (mode == DImode)
11697             *cost += COSTS_N_INSNS (1);
11698
11699           if (!speed_p)
11700             return true;
11701
11702           if (GET_MODE (XEXP (x, 0)) == SImode)
11703             *cost += extra_cost->ldst.load;
11704           else
11705             *cost += extra_cost->ldst.load_sign_extend;
11706
11707           if (mode == DImode)
11708             *cost += extra_cost->alu.shift;
11709
11710           return true;
11711         }
11712
11713       /* Widening from less than 32-bits requires an extend operation.  */
11714       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11715         {
11716           /* We have SXTB/SXTH.  */
11717           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11718           if (speed_p)
11719             *cost += extra_cost->alu.extend;
11720         }
11721       else if (GET_MODE (XEXP (x, 0)) != SImode)
11722         {
11723           /* Needs two shifts.  */
11724           *cost += COSTS_N_INSNS (1);
11725           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11726           if (speed_p)
11727             *cost += 2 * extra_cost->alu.shift;
11728         }
11729
11730       /* Widening beyond 32-bits requires one more insn.  */
11731       if (mode == DImode)
11732         {
11733           *cost += COSTS_N_INSNS (1);
11734           if (speed_p)
11735             *cost += extra_cost->alu.shift;
11736         }
11737
11738       return true;
11739
11740     case ZERO_EXTEND:
11741       if ((arm_arch4
11742            || GET_MODE (XEXP (x, 0)) == SImode
11743            || GET_MODE (XEXP (x, 0)) == QImode)
11744           && MEM_P (XEXP (x, 0)))
11745         {
11746           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11747
11748           if (mode == DImode)
11749             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11750
11751           return true;
11752         }
11753
11754       /* Widening from less than 32-bits requires an extend operation.  */
11755       if (GET_MODE (XEXP (x, 0)) == QImode)
11756         {
11757           /* UXTB can be a shorter instruction in Thumb2, but it might
11758              be slower than the AND Rd, Rn, #255 alternative.  When
11759              optimizing for speed it should never be slower to use
11760              AND, and we don't really model 16-bit vs 32-bit insns
11761              here.  */
11762           if (speed_p)
11763             *cost += extra_cost->alu.logical;
11764         }
11765       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11766         {
11767           /* We have UXTB/UXTH.  */
11768           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11769           if (speed_p)
11770             *cost += extra_cost->alu.extend;
11771         }
11772       else if (GET_MODE (XEXP (x, 0)) != SImode)
11773         {
11774           /* Needs two shifts.  It's marginally preferable to use
11775              shifts rather than two BIC instructions as the second
11776              shift may merge with a subsequent insn as a shifter
11777              op.  */
11778           *cost = COSTS_N_INSNS (2);
11779           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11780           if (speed_p)
11781             *cost += 2 * extra_cost->alu.shift;
11782         }
11783
11784       /* Widening beyond 32-bits requires one more insn.  */
11785       if (mode == DImode)
11786         {
11787           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11788         }
11789
11790       return true;
11791
11792     case CONST_INT:
11793       *cost = 0;
11794       /* CONST_INT has no mode, so we cannot tell for sure how many
11795          insns are really going to be needed.  The best we can do is
11796          look at the value passed.  If it fits in SImode, then assume
11797          that's the mode it will be used for.  Otherwise assume it
11798          will be used in DImode.  */
11799       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11800         mode = SImode;
11801       else
11802         mode = DImode;
11803
11804       /* Avoid blowing up in arm_gen_constant ().  */
11805       if (!(outer_code == PLUS
11806             || outer_code == AND
11807             || outer_code == IOR
11808             || outer_code == XOR
11809             || outer_code == MINUS))
11810         outer_code = SET;
11811
11812     const_int_cost:
11813       if (mode == SImode)
11814         {
11815           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11816                                                     INTVAL (x), NULL, NULL,
11817                                                     0, 0));
11818           /* Extra costs?  */
11819         }
11820       else
11821         {
11822           *cost += COSTS_N_INSNS (arm_gen_constant
11823                                   (outer_code, SImode, NULL,
11824                                    trunc_int_for_mode (INTVAL (x), SImode),
11825                                    NULL, NULL, 0, 0)
11826                                   + arm_gen_constant (outer_code, SImode, NULL,
11827                                                       INTVAL (x) >> 32, NULL,
11828                                                       NULL, 0, 0));
11829           /* Extra costs?  */
11830         }
11831
11832       return true;
11833
11834     case CONST:
11835     case LABEL_REF:
11836     case SYMBOL_REF:
11837       if (speed_p)
11838         {
11839           if (arm_arch_thumb2 && !flag_pic)
11840             *cost += COSTS_N_INSNS (1);
11841           else
11842             *cost += extra_cost->ldst.load;
11843         }
11844       else
11845         *cost += COSTS_N_INSNS (1);
11846
11847       if (flag_pic)
11848         {
11849           *cost += COSTS_N_INSNS (1);
11850           if (speed_p)
11851             *cost += extra_cost->alu.arith;
11852         }
11853
11854       return true;
11855
11856     case CONST_FIXED:
11857       *cost = COSTS_N_INSNS (4);
11858       /* Fixme.  */
11859       return true;
11860
11861     case CONST_DOUBLE:
11862       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11863           && (mode == SFmode || !TARGET_VFP_SINGLE))
11864         {
11865           if (vfp3_const_double_rtx (x))
11866             {
11867               if (speed_p)
11868                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11869               return true;
11870             }
11871
11872           if (speed_p)
11873             {
11874               if (mode == DFmode)
11875                 *cost += extra_cost->ldst.loadd;
11876               else
11877                 *cost += extra_cost->ldst.loadf;
11878             }
11879           else
11880             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11881
11882           return true;
11883         }
11884       *cost = COSTS_N_INSNS (4);
11885       return true;
11886
11887     case CONST_VECTOR:
11888       /* Fixme.  */
11889       if (((TARGET_NEON && TARGET_HARD_FLOAT
11890             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11891            || TARGET_HAVE_MVE)
11892           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11893         *cost = COSTS_N_INSNS (1);
11894       else
11895         *cost = COSTS_N_INSNS (4);
11896       return true;
11897
11898     case HIGH:
11899     case LO_SUM:
11900       /* When optimizing for size, we prefer constant pool entries to
11901          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11902       if (!speed_p)
11903         *cost += 1;
11904       return true;
11905
11906     case CLZ:
11907       if (speed_p)
11908         *cost += extra_cost->alu.clz;
11909       return false;
11910
11911     case SMIN:
11912       if (XEXP (x, 1) == const0_rtx)
11913         {
11914           if (speed_p)
11915             *cost += extra_cost->alu.log_shift;
11916           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11917           return true;
11918         }
11919       /* Fall through.  */
11920     case SMAX:
11921     case UMIN:
11922     case UMAX:
11923       *cost += COSTS_N_INSNS (1);
11924       return false;
11925
11926     case TRUNCATE:
11927       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11928           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11929           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11930           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11931           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11932                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11933               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11934                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11935                       == ZERO_EXTEND))))
11936         {
11937           if (speed_p)
11938             *cost += extra_cost->mult[1].extend;
11939           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11940                               ZERO_EXTEND, 0, speed_p)
11941                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11942                                 ZERO_EXTEND, 0, speed_p));
11943           return true;
11944         }
11945       *cost = LIBCALL_COST (1);
11946       return false;
11947
11948     case UNSPEC_VOLATILE:
11949     case UNSPEC:
11950       return arm_unspec_cost (x, outer_code, speed_p, cost);
11951
11952     case PC:
11953       /* Reading the PC is like reading any other register.  Writing it
11954          is more expensive, but we take that into account elsewhere.  */
11955       *cost = 0;
11956       return true;
11957
11958     case ZERO_EXTRACT:
11959       /* TODO: Simple zero_extract of bottom bits using AND.  */
11960       /* Fall through.  */
11961     case SIGN_EXTRACT:
11962       if (arm_arch6
11963           && mode == SImode
11964           && CONST_INT_P (XEXP (x, 1))
11965           && CONST_INT_P (XEXP (x, 2)))
11966         {
11967           if (speed_p)
11968             *cost += extra_cost->alu.bfx;
11969           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11970           return true;
11971         }
11972       /* Without UBFX/SBFX, need to resort to shift operations.  */
11973       *cost += COSTS_N_INSNS (1);
11974       if (speed_p)
11975         *cost += 2 * extra_cost->alu.shift;
11976       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11977       return true;
11978
11979     case FLOAT_EXTEND:
11980       if (TARGET_HARD_FLOAT)
11981         {
11982           if (speed_p)
11983             *cost += extra_cost->fp[mode == DFmode].widen;
11984           if (!TARGET_VFP5
11985               && GET_MODE (XEXP (x, 0)) == HFmode)
11986             {
11987               /* Pre v8, widening HF->DF is a two-step process, first
11988                  widening to SFmode.  */
11989               *cost += COSTS_N_INSNS (1);
11990               if (speed_p)
11991                 *cost += extra_cost->fp[0].widen;
11992             }
11993           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11994           return true;
11995         }
11996
11997       *cost = LIBCALL_COST (1);
11998       return false;
11999
12000     case FLOAT_TRUNCATE:
12001       if (TARGET_HARD_FLOAT)
12002         {
12003           if (speed_p)
12004             *cost += extra_cost->fp[mode == DFmode].narrow;
12005           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12006           return true;
12007           /* Vector modes?  */
12008         }
12009       *cost = LIBCALL_COST (1);
12010       return false;
12011
12012     case FMA:
12013       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
12014         {
12015           rtx op0 = XEXP (x, 0);
12016           rtx op1 = XEXP (x, 1);
12017           rtx op2 = XEXP (x, 2);
12018
12019
12020           /* vfms or vfnma.  */
12021           if (GET_CODE (op0) == NEG)
12022             op0 = XEXP (op0, 0);
12023
12024           /* vfnms or vfnma.  */
12025           if (GET_CODE (op2) == NEG)
12026             op2 = XEXP (op2, 0);
12027
12028           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12029           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12030           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12031
12032           if (speed_p)
12033             *cost += extra_cost->fp[mode ==DFmode].fma;
12034
12035           return true;
12036         }
12037
12038       *cost = LIBCALL_COST (3);
12039       return false;
12040
12041     case FIX:
12042     case UNSIGNED_FIX:
12043       if (TARGET_HARD_FLOAT)
12044         {
12045           /* The *combine_vcvtf2i reduces a vmul+vcvt into
12046              a vcvt fixed-point conversion.  */
12047           if (code == FIX && mode == SImode
12048               && GET_CODE (XEXP (x, 0)) == FIX
12049               && GET_MODE (XEXP (x, 0)) == SFmode
12050               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12051               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12052                  > 0)
12053             {
12054               if (speed_p)
12055                 *cost += extra_cost->fp[0].toint;
12056
12057               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12058                                  code, 0, speed_p);
12059               return true;
12060             }
12061
12062           if (GET_MODE_CLASS (mode) == MODE_INT)
12063             {
12064               mode = GET_MODE (XEXP (x, 0));
12065               if (speed_p)
12066                 *cost += extra_cost->fp[mode == DFmode].toint;
12067               /* Strip of the 'cost' of rounding towards zero.  */
12068               if (GET_CODE (XEXP (x, 0)) == FIX)
12069                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12070                                    0, speed_p);
12071               else
12072                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12073               /* ??? Increase the cost to deal with transferring from
12074                  FP -> CORE registers?  */
12075               return true;
12076             }
12077           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12078                    && TARGET_VFP5)
12079             {
12080               if (speed_p)
12081                 *cost += extra_cost->fp[mode == DFmode].roundint;
12082               return false;
12083             }
12084           /* Vector costs? */
12085         }
12086       *cost = LIBCALL_COST (1);
12087       return false;
12088
12089     case FLOAT:
12090     case UNSIGNED_FLOAT:
12091       if (TARGET_HARD_FLOAT)
12092         {
12093           /* ??? Increase the cost to deal with transferring from CORE
12094              -> FP registers?  */
12095           if (speed_p)
12096             *cost += extra_cost->fp[mode == DFmode].fromint;
12097           return false;
12098         }
12099       *cost = LIBCALL_COST (1);
12100       return false;
12101
12102     case CALL:
12103       return true;
12104
12105     case ASM_OPERANDS:
12106       {
12107       /* Just a guess.  Guess number of instructions in the asm
12108          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
12109          though (see PR60663).  */
12110         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12111         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12112
12113         *cost = COSTS_N_INSNS (asm_length + num_operands);
12114         return true;
12115       }
12116     default:
12117       if (mode != VOIDmode)
12118         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12119       else
12120         *cost = COSTS_N_INSNS (4); /* Who knows?  */
12121       return false;
12122     }
12123 }
12124
12125 #undef HANDLE_NARROW_SHIFT_ARITH
12126
12127 /* RTX costs entry point.  */
12128
12129 static bool
12130 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12131                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12132 {
12133   bool result;
12134   int code = GET_CODE (x);
12135   gcc_assert (current_tune->insn_extra_cost);
12136
12137   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
12138                                 (enum rtx_code) outer_code,
12139                                 current_tune->insn_extra_cost,
12140                                 total, speed);
12141
12142   if (dump_file && arm_verbose_cost)
12143     {
12144       print_rtl_single (dump_file, x);
12145       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12146                *total, result ? "final" : "partial");
12147     }
12148   return result;
12149 }
12150
12151 static int
12152 arm_insn_cost (rtx_insn *insn, bool speed)
12153 {
12154   int cost;
12155
12156   /* Don't cost a simple reg-reg move at a full insn cost: such moves
12157      will likely disappear during register allocation.  */
12158   if (!reload_completed
12159       && GET_CODE (PATTERN (insn)) == SET
12160       && REG_P (SET_DEST (PATTERN (insn)))
12161       && REG_P (SET_SRC (PATTERN (insn))))
12162     return 2;
12163   cost = pattern_cost (PATTERN (insn), speed);
12164   /* If the cost is zero, then it's likely a complex insn.  We don't want the
12165      cost of these to be less than something we know about.  */
12166   return cost ? cost : COSTS_N_INSNS (2);
12167 }
12168
12169 /* All address computations that can be done are free, but rtx cost returns
12170    the same for practically all of them.  So we weight the different types
12171    of address here in the order (most pref first):
12172    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
12173 static inline int
12174 arm_arm_address_cost (rtx x)
12175 {
12176   enum rtx_code c  = GET_CODE (x);
12177
12178   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12179     return 0;
12180   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12181     return 10;
12182
12183   if (c == PLUS)
12184     {
12185       if (CONST_INT_P (XEXP (x, 1)))
12186         return 2;
12187
12188       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12189         return 3;
12190
12191       return 4;
12192     }
12193
12194   return 6;
12195 }
12196
12197 static inline int
12198 arm_thumb_address_cost (rtx x)
12199 {
12200   enum rtx_code c  = GET_CODE (x);
12201
12202   if (c == REG)
12203     return 1;
12204   if (c == PLUS
12205       && REG_P (XEXP (x, 0))
12206       && CONST_INT_P (XEXP (x, 1)))
12207     return 1;
12208
12209   return 2;
12210 }
12211
12212 static int
12213 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12214                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12215 {
12216   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12217 }
12218
12219 /* Adjust cost hook for XScale.  */
12220 static bool
12221 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12222                           int * cost)
12223 {
12224   /* Some true dependencies can have a higher cost depending
12225      on precisely how certain input operands are used.  */
12226   if (dep_type == 0
12227       && recog_memoized (insn) >= 0
12228       && recog_memoized (dep) >= 0)
12229     {
12230       int shift_opnum = get_attr_shift (insn);
12231       enum attr_type attr_type = get_attr_type (dep);
12232
12233       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12234          operand for INSN.  If we have a shifted input operand and the
12235          instruction we depend on is another ALU instruction, then we may
12236          have to account for an additional stall.  */
12237       if (shift_opnum != 0
12238           && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12239               || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12240               || attr_type == TYPE_ALUS_SHIFT_IMM
12241               || attr_type == TYPE_LOGIC_SHIFT_IMM
12242               || attr_type == TYPE_LOGICS_SHIFT_IMM
12243               || attr_type == TYPE_ALU_SHIFT_REG
12244               || attr_type == TYPE_ALUS_SHIFT_REG
12245               || attr_type == TYPE_LOGIC_SHIFT_REG
12246               || attr_type == TYPE_LOGICS_SHIFT_REG
12247               || attr_type == TYPE_MOV_SHIFT
12248               || attr_type == TYPE_MVN_SHIFT
12249               || attr_type == TYPE_MOV_SHIFT_REG
12250               || attr_type == TYPE_MVN_SHIFT_REG))
12251         {
12252           rtx shifted_operand;
12253           int opno;
12254
12255           /* Get the shifted operand.  */
12256           extract_insn (insn);
12257           shifted_operand = recog_data.operand[shift_opnum];
12258
12259           /* Iterate over all the operands in DEP.  If we write an operand
12260              that overlaps with SHIFTED_OPERAND, then we have increase the
12261              cost of this dependency.  */
12262           extract_insn (dep);
12263           preprocess_constraints (dep);
12264           for (opno = 0; opno < recog_data.n_operands; opno++)
12265             {
12266               /* We can ignore strict inputs.  */
12267               if (recog_data.operand_type[opno] == OP_IN)
12268                 continue;
12269
12270               if (reg_overlap_mentioned_p (recog_data.operand[opno],
12271                                            shifted_operand))
12272                 {
12273                   *cost = 2;
12274                   return false;
12275                 }
12276             }
12277         }
12278     }
12279   return true;
12280 }
12281
12282 /* Adjust cost hook for Cortex A9.  */
12283 static bool
12284 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12285                              int * cost)
12286 {
12287   switch (dep_type)
12288     {
12289     case REG_DEP_ANTI:
12290       *cost = 0;
12291       return false;
12292
12293     case REG_DEP_TRUE:
12294     case REG_DEP_OUTPUT:
12295         if (recog_memoized (insn) >= 0
12296             && recog_memoized (dep) >= 0)
12297           {
12298             if (GET_CODE (PATTERN (insn)) == SET)
12299               {
12300                 if (GET_MODE_CLASS
12301                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12302                   || GET_MODE_CLASS
12303                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12304                   {
12305                     enum attr_type attr_type_insn = get_attr_type (insn);
12306                     enum attr_type attr_type_dep = get_attr_type (dep);
12307
12308                     /* By default all dependencies of the form
12309                        s0 = s0 <op> s1
12310                        s0 = s0 <op> s2
12311                        have an extra latency of 1 cycle because
12312                        of the input and output dependency in this
12313                        case. However this gets modeled as an true
12314                        dependency and hence all these checks.  */
12315                     if (REG_P (SET_DEST (PATTERN (insn)))
12316                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12317                       {
12318                         /* FMACS is a special case where the dependent
12319                            instruction can be issued 3 cycles before
12320                            the normal latency in case of an output
12321                            dependency.  */
12322                         if ((attr_type_insn == TYPE_FMACS
12323                              || attr_type_insn == TYPE_FMACD)
12324                             && (attr_type_dep == TYPE_FMACS
12325                                 || attr_type_dep == TYPE_FMACD))
12326                           {
12327                             if (dep_type == REG_DEP_OUTPUT)
12328                               *cost = insn_default_latency (dep) - 3;
12329                             else
12330                               *cost = insn_default_latency (dep);
12331                             return false;
12332                           }
12333                         else
12334                           {
12335                             if (dep_type == REG_DEP_OUTPUT)
12336                               *cost = insn_default_latency (dep) + 1;
12337                             else
12338                               *cost = insn_default_latency (dep);
12339                           }
12340                         return false;
12341                       }
12342                   }
12343               }
12344           }
12345         break;
12346
12347     default:
12348       gcc_unreachable ();
12349     }
12350
12351   return true;
12352 }
12353
12354 /* Adjust cost hook for FA726TE.  */
12355 static bool
12356 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12357                            int * cost)
12358 {
12359   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12360      have penalty of 3.  */
12361   if (dep_type == REG_DEP_TRUE
12362       && recog_memoized (insn) >= 0
12363       && recog_memoized (dep) >= 0
12364       && get_attr_conds (dep) == CONDS_SET)
12365     {
12366       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12367       if (get_attr_conds (insn) == CONDS_USE
12368           && get_attr_type (insn) != TYPE_BRANCH)
12369         {
12370           *cost = 3;
12371           return false;
12372         }
12373
12374       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12375           || get_attr_conds (insn) == CONDS_USE)
12376         {
12377           *cost = 0;
12378           return false;
12379         }
12380     }
12381
12382   return true;
12383 }
12384
12385 /* Implement TARGET_REGISTER_MOVE_COST.
12386
12387    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12388    it is typically more expensive than a single memory access.  We set
12389    the cost to less than two memory accesses so that floating
12390    point to integer conversion does not go through memory.  */
12391
12392 int
12393 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12394                         reg_class_t from, reg_class_t to)
12395 {
12396   if (TARGET_32BIT)
12397     {
12398       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12399           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12400         return 15;
12401       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12402                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12403         return 4;
12404       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12405         return 20;
12406       else
12407         return 2;
12408     }
12409   else
12410     {
12411       if (from == HI_REGS || to == HI_REGS)
12412         return 4;
12413       else
12414         return 2;
12415     }
12416 }
12417
12418 /* Implement TARGET_MEMORY_MOVE_COST.  */
12419
12420 int
12421 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12422                       bool in ATTRIBUTE_UNUSED)
12423 {
12424   if (TARGET_32BIT)
12425     return 10;
12426   else
12427     {
12428       if (GET_MODE_SIZE (mode) < 4)
12429         return 8;
12430       else
12431         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12432     }
12433 }
12434
12435 /* Vectorizer cost model implementation.  */
12436
12437 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12438 static int
12439 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12440                                 tree vectype,
12441                                 int misalign ATTRIBUTE_UNUSED)
12442 {
12443   unsigned elements;
12444
12445   switch (type_of_cost)
12446     {
12447       case scalar_stmt:
12448         return current_tune->vec_costs->scalar_stmt_cost;
12449
12450       case scalar_load:
12451         return current_tune->vec_costs->scalar_load_cost;
12452
12453       case scalar_store:
12454         return current_tune->vec_costs->scalar_store_cost;
12455
12456       case vector_stmt:
12457         return current_tune->vec_costs->vec_stmt_cost;
12458
12459       case vector_load:
12460         return current_tune->vec_costs->vec_align_load_cost;
12461
12462       case vector_store:
12463         return current_tune->vec_costs->vec_store_cost;
12464
12465       case vec_to_scalar:
12466         return current_tune->vec_costs->vec_to_scalar_cost;
12467
12468       case scalar_to_vec:
12469         return current_tune->vec_costs->scalar_to_vec_cost;
12470
12471       case unaligned_load:
12472       case vector_gather_load:
12473         return current_tune->vec_costs->vec_unalign_load_cost;
12474
12475       case unaligned_store:
12476       case vector_scatter_store:
12477         return current_tune->vec_costs->vec_unalign_store_cost;
12478
12479       case cond_branch_taken:
12480         return current_tune->vec_costs->cond_taken_branch_cost;
12481
12482       case cond_branch_not_taken:
12483         return current_tune->vec_costs->cond_not_taken_branch_cost;
12484
12485       case vec_perm:
12486       case vec_promote_demote:
12487         return current_tune->vec_costs->vec_stmt_cost;
12488
12489       case vec_construct:
12490         elements = TYPE_VECTOR_SUBPARTS (vectype);
12491         return elements / 2 + 1;
12492
12493       default:
12494         gcc_unreachable ();
12495     }
12496 }
12497
12498 /* Return true if and only if this insn can dual-issue only as older.  */
12499 static bool
12500 cortexa7_older_only (rtx_insn *insn)
12501 {
12502   if (recog_memoized (insn) < 0)
12503     return false;
12504
12505   switch (get_attr_type (insn))
12506     {
12507     case TYPE_ALU_DSP_REG:
12508     case TYPE_ALU_SREG:
12509     case TYPE_ALUS_SREG:
12510     case TYPE_LOGIC_REG:
12511     case TYPE_LOGICS_REG:
12512     case TYPE_ADC_REG:
12513     case TYPE_ADCS_REG:
12514     case TYPE_ADR:
12515     case TYPE_BFM:
12516     case TYPE_REV:
12517     case TYPE_MVN_REG:
12518     case TYPE_SHIFT_IMM:
12519     case TYPE_SHIFT_REG:
12520     case TYPE_LOAD_BYTE:
12521     case TYPE_LOAD_4:
12522     case TYPE_STORE_4:
12523     case TYPE_FFARITHS:
12524     case TYPE_FADDS:
12525     case TYPE_FFARITHD:
12526     case TYPE_FADDD:
12527     case TYPE_FMOV:
12528     case TYPE_F_CVT:
12529     case TYPE_FCMPS:
12530     case TYPE_FCMPD:
12531     case TYPE_FCONSTS:
12532     case TYPE_FCONSTD:
12533     case TYPE_FMULS:
12534     case TYPE_FMACS:
12535     case TYPE_FMULD:
12536     case TYPE_FMACD:
12537     case TYPE_FDIVS:
12538     case TYPE_FDIVD:
12539     case TYPE_F_MRC:
12540     case TYPE_F_MRRC:
12541     case TYPE_F_FLAG:
12542     case TYPE_F_LOADS:
12543     case TYPE_F_STORES:
12544       return true;
12545     default:
12546       return false;
12547     }
12548 }
12549
12550 /* Return true if and only if this insn can dual-issue as younger.  */
12551 static bool
12552 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12553 {
12554   if (recog_memoized (insn) < 0)
12555     {
12556       if (verbose > 5)
12557         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12558       return false;
12559     }
12560
12561   switch (get_attr_type (insn))
12562     {
12563     case TYPE_ALU_IMM:
12564     case TYPE_ALUS_IMM:
12565     case TYPE_LOGIC_IMM:
12566     case TYPE_LOGICS_IMM:
12567     case TYPE_EXTEND:
12568     case TYPE_MVN_IMM:
12569     case TYPE_MOV_IMM:
12570     case TYPE_MOV_REG:
12571     case TYPE_MOV_SHIFT:
12572     case TYPE_MOV_SHIFT_REG:
12573     case TYPE_BRANCH:
12574     case TYPE_CALL:
12575       return true;
12576     default:
12577       return false;
12578     }
12579 }
12580
12581
12582 /* Look for an instruction that can dual issue only as an older
12583    instruction, and move it in front of any instructions that can
12584    dual-issue as younger, while preserving the relative order of all
12585    other instructions in the ready list.  This is a hueuristic to help
12586    dual-issue in later cycles, by postponing issue of more flexible
12587    instructions.  This heuristic may affect dual issue opportunities
12588    in the current cycle.  */
12589 static void
12590 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12591                         int *n_readyp, int clock)
12592 {
12593   int i;
12594   int first_older_only = -1, first_younger = -1;
12595
12596   if (verbose > 5)
12597     fprintf (file,
12598              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12599              clock,
12600              *n_readyp);
12601
12602   /* Traverse the ready list from the head (the instruction to issue
12603      first), and looking for the first instruction that can issue as
12604      younger and the first instruction that can dual-issue only as
12605      older.  */
12606   for (i = *n_readyp - 1; i >= 0; i--)
12607     {
12608       rtx_insn *insn = ready[i];
12609       if (cortexa7_older_only (insn))
12610         {
12611           first_older_only = i;
12612           if (verbose > 5)
12613             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12614           break;
12615         }
12616       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12617         first_younger = i;
12618     }
12619
12620   /* Nothing to reorder because either no younger insn found or insn
12621      that can dual-issue only as older appears before any insn that
12622      can dual-issue as younger.  */
12623   if (first_younger == -1)
12624     {
12625       if (verbose > 5)
12626         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12627       return;
12628     }
12629
12630   /* Nothing to reorder because no older-only insn in the ready list.  */
12631   if (first_older_only == -1)
12632     {
12633       if (verbose > 5)
12634         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12635       return;
12636     }
12637
12638   /* Move first_older_only insn before first_younger.  */
12639   if (verbose > 5)
12640     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12641              INSN_UID(ready [first_older_only]),
12642              INSN_UID(ready [first_younger]));
12643   rtx_insn *first_older_only_insn = ready [first_older_only];
12644   for (i = first_older_only; i < first_younger; i++)
12645     {
12646       ready[i] = ready[i+1];
12647     }
12648
12649   ready[i] = first_older_only_insn;
12650   return;
12651 }
12652
12653 /* Implement TARGET_SCHED_REORDER. */
12654 static int
12655 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12656                    int clock)
12657 {
12658   switch (arm_tune)
12659     {
12660     case TARGET_CPU_cortexa7:
12661       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12662       break;
12663     default:
12664       /* Do nothing for other cores.  */
12665       break;
12666     }
12667
12668   return arm_issue_rate ();
12669 }
12670
12671 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12672    It corrects the value of COST based on the relationship between
12673    INSN and DEP through the dependence LINK.  It returns the new
12674    value. There is a per-core adjust_cost hook to adjust scheduler costs
12675    and the per-core hook can choose to completely override the generic
12676    adjust_cost function. Only put bits of code into arm_adjust_cost that
12677    are common across all cores.  */
12678 static int
12679 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12680                  unsigned int)
12681 {
12682   rtx i_pat, d_pat;
12683
12684  /* When generating Thumb-1 code, we want to place flag-setting operations
12685     close to a conditional branch which depends on them, so that we can
12686     omit the comparison. */
12687   if (TARGET_THUMB1
12688       && dep_type == 0
12689       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12690       && recog_memoized (dep) >= 0
12691       && get_attr_conds (dep) == CONDS_SET)
12692     return 0;
12693
12694   if (current_tune->sched_adjust_cost != NULL)
12695     {
12696       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12697         return cost;
12698     }
12699
12700   /* XXX Is this strictly true?  */
12701   if (dep_type == REG_DEP_ANTI
12702       || dep_type == REG_DEP_OUTPUT)
12703     return 0;
12704
12705   /* Call insns don't incur a stall, even if they follow a load.  */
12706   if (dep_type == 0
12707       && CALL_P (insn))
12708     return 1;
12709
12710   if ((i_pat = single_set (insn)) != NULL
12711       && MEM_P (SET_SRC (i_pat))
12712       && (d_pat = single_set (dep)) != NULL
12713       && MEM_P (SET_DEST (d_pat)))
12714     {
12715       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12716       /* This is a load after a store, there is no conflict if the load reads
12717          from a cached area.  Assume that loads from the stack, and from the
12718          constant pool are cached, and that others will miss.  This is a
12719          hack.  */
12720
12721       if ((SYMBOL_REF_P (src_mem)
12722            && CONSTANT_POOL_ADDRESS_P (src_mem))
12723           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12724           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12725           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12726         return 1;
12727     }
12728
12729   return cost;
12730 }
12731
12732 int
12733 arm_max_conditional_execute (void)
12734 {
12735   return max_insns_skipped;
12736 }
12737
12738 static int
12739 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12740 {
12741   if (TARGET_32BIT)
12742     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12743   else
12744     return (optimize > 0) ? 2 : 0;
12745 }
12746
12747 static int
12748 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12749 {
12750   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12751 }
12752
12753 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12754    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12755    sequences of non-executed instructions in IT blocks probably take the same
12756    amount of time as executed instructions (and the IT instruction itself takes
12757    space in icache).  This function was experimentally determined to give good
12758    results on a popular embedded benchmark.  */
12759
12760 static int
12761 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12762 {
12763   return (TARGET_32BIT && speed_p) ? 1
12764          : arm_default_branch_cost (speed_p, predictable_p);
12765 }
12766
12767 static int
12768 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12769 {
12770   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12771 }
12772
12773 static bool fp_consts_inited = false;
12774
12775 static REAL_VALUE_TYPE value_fp0;
12776
12777 static void
12778 init_fp_table (void)
12779 {
12780   REAL_VALUE_TYPE r;
12781
12782   r = REAL_VALUE_ATOF ("0", DFmode);
12783   value_fp0 = r;
12784   fp_consts_inited = true;
12785 }
12786
12787 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12788 int
12789 arm_const_double_rtx (rtx x)
12790 {
12791   const REAL_VALUE_TYPE *r;
12792
12793   if (!fp_consts_inited)
12794     init_fp_table ();
12795
12796   r = CONST_DOUBLE_REAL_VALUE (x);
12797   if (REAL_VALUE_MINUS_ZERO (*r))
12798     return 0;
12799
12800   if (real_equal (r, &value_fp0))
12801     return 1;
12802
12803   return 0;
12804 }
12805
12806 /* VFPv3 has a fairly wide range of representable immediates, formed from
12807    "quarter-precision" floating-point values. These can be evaluated using this
12808    formula (with ^ for exponentiation):
12809
12810      -1^s * n * 2^-r
12811
12812    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12813    16 <= n <= 31 and 0 <= r <= 7.
12814
12815    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12816
12817      - A (most-significant) is the sign bit.
12818      - BCD are the exponent (encoded as r XOR 3).
12819      - EFGH are the mantissa (encoded as n - 16).
12820 */
12821
12822 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12823    fconst[sd] instruction, or -1 if X isn't suitable.  */
12824 static int
12825 vfp3_const_double_index (rtx x)
12826 {
12827   REAL_VALUE_TYPE r, m;
12828   int sign, exponent;
12829   unsigned HOST_WIDE_INT mantissa, mant_hi;
12830   unsigned HOST_WIDE_INT mask;
12831   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12832   bool fail;
12833
12834   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12835     return -1;
12836
12837   r = *CONST_DOUBLE_REAL_VALUE (x);
12838
12839   /* We can't represent these things, so detect them first.  */
12840   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12841     return -1;
12842
12843   /* Extract sign, exponent and mantissa.  */
12844   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12845   r = real_value_abs (&r);
12846   exponent = REAL_EXP (&r);
12847   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12848      highest (sign) bit, with a fixed binary point at bit point_pos.
12849      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12850      bits for the mantissa, this may fail (low bits would be lost).  */
12851   real_ldexp (&m, &r, point_pos - exponent);
12852   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12853   mantissa = w.elt (0);
12854   mant_hi = w.elt (1);
12855
12856   /* If there are bits set in the low part of the mantissa, we can't
12857      represent this value.  */
12858   if (mantissa != 0)
12859     return -1;
12860
12861   /* Now make it so that mantissa contains the most-significant bits, and move
12862      the point_pos to indicate that the least-significant bits have been
12863      discarded.  */
12864   point_pos -= HOST_BITS_PER_WIDE_INT;
12865   mantissa = mant_hi;
12866
12867   /* We can permit four significant bits of mantissa only, plus a high bit
12868      which is always 1.  */
12869   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12870   if ((mantissa & mask) != 0)
12871     return -1;
12872
12873   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12874   mantissa >>= point_pos - 5;
12875
12876   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12877      floating-point immediate zero with Neon using an integer-zero load, but
12878      that case is handled elsewhere.)  */
12879   if (mantissa == 0)
12880     return -1;
12881
12882   gcc_assert (mantissa >= 16 && mantissa <= 31);
12883
12884   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12885      normalized significands are in the range [1, 2). (Our mantissa is shifted
12886      left 4 places at this point relative to normalized IEEE754 values).  GCC
12887      internally uses [0.5, 1) (see real.cc), so the exponent returned from
12888      REAL_EXP must be altered.  */
12889   exponent = 5 - exponent;
12890
12891   if (exponent < 0 || exponent > 7)
12892     return -1;
12893
12894   /* Sign, mantissa and exponent are now in the correct form to plug into the
12895      formula described in the comment above.  */
12896   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12897 }
12898
12899 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12900 int
12901 vfp3_const_double_rtx (rtx x)
12902 {
12903   if (!TARGET_VFP3)
12904     return 0;
12905
12906   return vfp3_const_double_index (x) != -1;
12907 }
12908
12909 /* Recognize immediates which can be used in various Neon and MVE instructions.
12910    Legal immediates are described by the following table (for VMVN variants, the
12911    bitwise inverse of the constant shown is recognized. In either case, VMOV
12912    is output and the correct instruction to use for a given constant is chosen
12913    by the assembler). The constant shown is replicated across all elements of
12914    the destination vector.
12915
12916    insn elems variant constant (binary)
12917    ---- ----- ------- -----------------
12918    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12919    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12920    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12921    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12922    vmov  i16     4    00000000 abcdefgh
12923    vmov  i16     5    abcdefgh 00000000
12924    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12925    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12926    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12927    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12928    vmvn  i16    10    00000000 abcdefgh
12929    vmvn  i16    11    abcdefgh 00000000
12930    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12931    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12932    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12933    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12934    vmov   i8    16    abcdefgh
12935    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12936                       eeeeeeee ffffffff gggggggg hhhhhhhh
12937    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12938    vmov  f32    19    00000000 00000000 00000000 00000000
12939
12940    For case 18, B = !b. Representable values are exactly those accepted by
12941    vfp3_const_double_index, but are output as floating-point numbers rather
12942    than indices.
12943
12944    For case 19, we will change it to vmov.i32 when assembling.
12945
12946    Variants 0-5 (inclusive) may also be used as immediates for the second
12947    operand of VORR/VBIC instructions.
12948
12949    The INVERSE argument causes the bitwise inverse of the given operand to be
12950    recognized instead (used for recognizing legal immediates for the VAND/VORN
12951    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12952    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12953    output, rather than the real insns vbic/vorr).
12954
12955    INVERSE makes no difference to the recognition of float vectors.
12956
12957    The return value is the variant of immediate as shown in the above table, or
12958    -1 if the given value doesn't match any of the listed patterns.
12959 */
12960 static int
12961 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12962                       rtx *modconst, int *elementwidth)
12963 {
12964 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12965   matches = 1;                                  \
12966   for (i = 0; i < idx; i += (STRIDE))           \
12967     if (!(TEST))                                \
12968       matches = 0;                              \
12969   if (matches)                                  \
12970     {                                           \
12971       immtype = (CLASS);                        \
12972       elsize = (ELSIZE);                        \
12973       break;                                    \
12974     }
12975
12976   unsigned int i, elsize = 0, idx = 0, n_elts;
12977   unsigned int innersize;
12978   unsigned char bytes[16] = {};
12979   int immtype = -1, matches;
12980   unsigned int invmask = inverse ? 0xff : 0;
12981   bool vector = GET_CODE (op) == CONST_VECTOR;
12982
12983   if (vector)
12984     n_elts = CONST_VECTOR_NUNITS (op);
12985   else
12986     {
12987       n_elts = 1;
12988       gcc_assert (mode != VOIDmode);
12989     }
12990
12991   innersize = GET_MODE_UNIT_SIZE (mode);
12992
12993   /* Only support 128-bit vectors for MVE.  */
12994   if (TARGET_HAVE_MVE
12995       && (!vector
12996           || VALID_MVE_PRED_MODE (mode)
12997           || n_elts * innersize != 16))
12998     return -1;
12999
13000   if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
13001     return -1;
13002
13003   /* Vectors of float constants.  */
13004   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13005     {
13006       rtx el0 = CONST_VECTOR_ELT (op, 0);
13007
13008       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
13009         return -1;
13010
13011       /* FP16 vectors cannot be represented.  */
13012       if (GET_MODE_INNER (mode) == HFmode)
13013         return -1;
13014
13015       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
13016          are distinct in this context.  */
13017       if (!const_vec_duplicate_p (op))
13018         return -1;
13019
13020       if (modconst)
13021         *modconst = CONST_VECTOR_ELT (op, 0);
13022
13023       if (elementwidth)
13024         *elementwidth = 0;
13025
13026       if (el0 == CONST0_RTX (GET_MODE (el0)))
13027         return 19;
13028       else
13029         return 18;
13030     }
13031
13032   /* The tricks done in the code below apply for little-endian vector layout.
13033      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13034      FIXME: Implement logic for big-endian vectors.  */
13035   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13036     return -1;
13037
13038   /* Splat vector constant out into a byte vector.  */
13039   for (i = 0; i < n_elts; i++)
13040     {
13041       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13042       unsigned HOST_WIDE_INT elpart;
13043
13044       gcc_assert (CONST_INT_P (el));
13045       elpart = INTVAL (el);
13046
13047       for (unsigned int byte = 0; byte < innersize; byte++)
13048         {
13049           bytes[idx++] = (elpart & 0xff) ^ invmask;
13050           elpart >>= BITS_PER_UNIT;
13051         }
13052     }
13053
13054   /* Sanity check.  */
13055   gcc_assert (idx == GET_MODE_SIZE (mode));
13056
13057   do
13058     {
13059       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13060                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13061
13062       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13063                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13064
13065       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13066                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13067
13068       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13069                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13070
13071       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13072
13073       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13074
13075       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13076                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13077
13078       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13079                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13080
13081       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13082                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13083
13084       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13085                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13086
13087       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13088
13089       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13090
13091       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13092                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13093
13094       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13095                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13096
13097       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13098                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13099
13100       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13101                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13102
13103       CHECK (1, 8, 16, bytes[i] == bytes[0]);
13104
13105       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13106                         && bytes[i] == bytes[(i + 8) % idx]);
13107     }
13108   while (0);
13109
13110   if (immtype == -1)
13111     return -1;
13112
13113   if (elementwidth)
13114     *elementwidth = elsize;
13115
13116   if (modconst)
13117     {
13118       unsigned HOST_WIDE_INT imm = 0;
13119
13120       /* Un-invert bytes of recognized vector, if necessary.  */
13121       if (invmask != 0)
13122         for (i = 0; i < idx; i++)
13123           bytes[i] ^= invmask;
13124
13125       if (immtype == 17)
13126         {
13127           /* FIXME: Broken on 32-bit H_W_I hosts.  */
13128           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13129
13130           for (i = 0; i < 8; i++)
13131             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13132                    << (i * BITS_PER_UNIT);
13133
13134           *modconst = GEN_INT (imm);
13135         }
13136       else
13137         {
13138           unsigned HOST_WIDE_INT imm = 0;
13139
13140           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13141             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13142
13143           *modconst = GEN_INT (imm);
13144         }
13145     }
13146
13147   return immtype;
13148 #undef CHECK
13149 }
13150
13151 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13152    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
13153    (or zero for float elements), and a modified constant (whatever should be
13154    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
13155    modified to "simd_immediate_valid_for_move" as this function will be used
13156    both by neon and mve.  */
13157 int
13158 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13159                                rtx *modconst, int *elementwidth)
13160 {
13161   rtx tmpconst;
13162   int tmpwidth;
13163   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13164
13165   if (retval == -1)
13166     return 0;
13167
13168   if (modconst)
13169     *modconst = tmpconst;
13170
13171   if (elementwidth)
13172     *elementwidth = tmpwidth;
13173
13174   return 1;
13175 }
13176
13177 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
13178    the immediate is valid, write a constant suitable for using as an operand
13179    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13180    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
13181
13182 int
13183 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13184                                 rtx *modconst, int *elementwidth)
13185 {
13186   rtx tmpconst;
13187   int tmpwidth;
13188   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13189
13190   if (retval < 0 || retval > 5)
13191     return 0;
13192
13193   if (modconst)
13194     *modconst = tmpconst;
13195
13196   if (elementwidth)
13197     *elementwidth = tmpwidth;
13198
13199   return 1;
13200 }
13201
13202 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
13203    the immediate is valid, write a constant suitable for using as an operand
13204    to VSHR/VSHL to *MODCONST and the corresponding element width to
13205    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13206    because they have different limitations.  */
13207
13208 int
13209 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13210                                 rtx *modconst, int *elementwidth,
13211                                 bool isleftshift)
13212 {
13213   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13214   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13215   unsigned HOST_WIDE_INT last_elt = 0;
13216   unsigned HOST_WIDE_INT maxshift;
13217
13218   /* Split vector constant out into a byte vector.  */
13219   for (i = 0; i < n_elts; i++)
13220     {
13221       rtx el = CONST_VECTOR_ELT (op, i);
13222       unsigned HOST_WIDE_INT elpart;
13223
13224       if (CONST_INT_P (el))
13225         elpart = INTVAL (el);
13226       else if (CONST_DOUBLE_P (el))
13227         return 0;
13228       else
13229         gcc_unreachable ();
13230
13231       if (i != 0 && elpart != last_elt)
13232         return 0;
13233
13234       last_elt = elpart;
13235     }
13236
13237   /* Shift less than element size.  */
13238   maxshift = innersize * 8;
13239
13240   if (isleftshift)
13241     {
13242       /* Left shift immediate value can be from 0 to <size>-1.  */
13243       if (last_elt >= maxshift)
13244         return 0;
13245     }
13246   else
13247     {
13248       /* Right shift immediate value can be from 1 to <size>.  */
13249       if (last_elt == 0 || last_elt > maxshift)
13250         return 0;
13251     }
13252
13253   if (elementwidth)
13254     *elementwidth = innersize * 8;
13255
13256   if (modconst)
13257     *modconst = CONST_VECTOR_ELT (op, 0);
13258
13259   return 1;
13260 }
13261
13262 /* Return a string suitable for output of Neon immediate logic operation
13263    MNEM.  */
13264
13265 char *
13266 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13267                              int inverse, int quad)
13268 {
13269   int width, is_valid;
13270   static char templ[40];
13271
13272   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13273
13274   gcc_assert (is_valid != 0);
13275
13276   if (quad)
13277     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13278   else
13279     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13280
13281   return templ;
13282 }
13283
13284 /* Return a string suitable for output of Neon immediate shift operation
13285    (VSHR or VSHL) MNEM.  */
13286
13287 char *
13288 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13289                              machine_mode mode, int quad,
13290                              bool isleftshift)
13291 {
13292   int width, is_valid;
13293   static char templ[40];
13294
13295   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13296   gcc_assert (is_valid != 0);
13297
13298   if (quad)
13299     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13300   else
13301     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13302
13303   return templ;
13304 }
13305
13306 /* Output a sequence of pairwise operations to implement a reduction.
13307    NOTE: We do "too much work" here, because pairwise operations work on two
13308    registers-worth of operands in one go. Unfortunately we can't exploit those
13309    extra calculations to do the full operation in fewer steps, I don't think.
13310    Although all vector elements of the result but the first are ignored, we
13311    actually calculate the same result in each of the elements. An alternative
13312    such as initially loading a vector with zero to use as each of the second
13313    operands would use up an additional register and take an extra instruction,
13314    for no particular gain.  */
13315
13316 void
13317 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13318                       rtx (*reduc) (rtx, rtx, rtx))
13319 {
13320   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13321   rtx tmpsum = op1;
13322
13323   for (i = parts / 2; i >= 1; i /= 2)
13324     {
13325       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13326       emit_insn (reduc (dest, tmpsum, tmpsum));
13327       tmpsum = dest;
13328     }
13329 }
13330
13331 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13332    loaded into a register using VDUP.
13333
13334    If this is the case, and GENERATE is set, we also generate
13335    instructions to do this and return an RTX to assign to the register.  */
13336
13337 static rtx
13338 neon_vdup_constant (rtx vals, bool generate)
13339 {
13340   machine_mode mode = GET_MODE (vals);
13341   machine_mode inner_mode = GET_MODE_INNER (mode);
13342   rtx x;
13343
13344   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13345     return NULL_RTX;
13346
13347   if (!const_vec_duplicate_p (vals, &x))
13348     /* The elements are not all the same.  We could handle repeating
13349        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13350        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13351        vdup.i16).  */
13352     return NULL_RTX;
13353
13354   if (!generate)
13355     return x;
13356
13357   /* We can load this constant by using VDUP and a constant in a
13358      single ARM register.  This will be cheaper than a vector
13359      load.  */
13360
13361   x = copy_to_mode_reg (inner_mode, x);
13362   return gen_vec_duplicate (mode, x);
13363 }
13364
13365 /* Return a HI representation of CONST_VEC suitable for MVE predicates.  */
13366 rtx
13367 mve_bool_vec_to_const (rtx const_vec)
13368 {
13369   machine_mode mode = GET_MODE (const_vec);
13370
13371   if (!VECTOR_MODE_P (mode))
13372     return const_vec;
13373
13374   unsigned n_elts = GET_MODE_NUNITS (mode);
13375   unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13376   unsigned shift_c = 16 / n_elts;
13377   unsigned i;
13378   int hi_val = 0;
13379
13380   for (i = 0; i < n_elts; i++)
13381     {
13382       rtx el = CONST_VECTOR_ELT (const_vec, i);
13383       unsigned HOST_WIDE_INT elpart;
13384
13385       gcc_assert (CONST_INT_P (el));
13386       elpart = INTVAL (el) & ((1U << el_prec) - 1);
13387
13388       unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13389
13390       hi_val |= elpart << (index * shift_c);
13391     }
13392   /* We are using mov immediate to encode this constant which writes 32-bits
13393      so we need to make sure the top 16-bits are all 0, otherwise we can't
13394      guarantee we can actually write this immediate.  */
13395   return gen_int_mode (hi_val, SImode);
13396 }
13397
13398 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13399    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13400    into a register.
13401
13402    If this is the case, and GENERATE is set, we also generate code to do
13403    this and return an RTX to copy into the register.  */
13404
13405 rtx
13406 neon_make_constant (rtx vals, bool generate)
13407 {
13408   machine_mode mode = GET_MODE (vals);
13409   rtx target;
13410   rtx const_vec = NULL_RTX;
13411   int n_elts = GET_MODE_NUNITS (mode);
13412   int n_const = 0;
13413   int i;
13414
13415   if (GET_CODE (vals) == CONST_VECTOR)
13416     const_vec = vals;
13417   else if (GET_CODE (vals) == PARALLEL)
13418     {
13419       /* A CONST_VECTOR must contain only CONST_INTs and
13420          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13421          Only store valid constants in a CONST_VECTOR.  */
13422       for (i = 0; i < n_elts; ++i)
13423         {
13424           rtx x = XVECEXP (vals, 0, i);
13425           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13426             n_const++;
13427         }
13428       if (n_const == n_elts)
13429         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13430     }
13431   else
13432     gcc_unreachable ();
13433
13434   if (const_vec != NULL
13435       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13436     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13437     return const_vec;
13438   else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13439     return mve_bool_vec_to_const (const_vec);
13440   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13441     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13442        pipeline cycle; creating the constant takes one or two ARM
13443        pipeline cycles.  */
13444     return target;
13445   else if (const_vec != NULL_RTX)
13446     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13447        (for either double or quad vectors).  We cannot take advantage
13448        of single-cycle VLD1 because we need a PC-relative addressing
13449        mode.  */
13450     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13451   else
13452     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13453        We cannot construct an initializer.  */
13454     return NULL_RTX;
13455 }
13456
13457 /* Initialize vector TARGET to VALS.  */
13458
13459 void
13460 neon_expand_vector_init (rtx target, rtx vals)
13461 {
13462   machine_mode mode = GET_MODE (target);
13463   machine_mode inner_mode = GET_MODE_INNER (mode);
13464   int n_elts = GET_MODE_NUNITS (mode);
13465   int n_var = 0, one_var = -1;
13466   bool all_same = true;
13467   rtx x, mem;
13468   int i;
13469
13470   for (i = 0; i < n_elts; ++i)
13471     {
13472       x = XVECEXP (vals, 0, i);
13473       if (!CONSTANT_P (x))
13474         ++n_var, one_var = i;
13475
13476       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13477         all_same = false;
13478     }
13479
13480   if (n_var == 0)
13481     {
13482       rtx constant = neon_make_constant (vals);
13483       if (constant != NULL_RTX)
13484         {
13485           emit_move_insn (target, constant);
13486           return;
13487         }
13488     }
13489
13490   /* Splat a single non-constant element if we can.  */
13491   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13492     {
13493       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13494       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13495       return;
13496     }
13497
13498   /* One field is non-constant.  Load constant then overwrite varying
13499      field.  This is more efficient than using the stack.  */
13500   if (n_var == 1)
13501     {
13502       rtx copy = copy_rtx (vals);
13503       rtx merge_mask = GEN_INT (1 << one_var);
13504
13505       /* Load constant part of vector, substitute neighboring value for
13506          varying element.  */
13507       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13508       neon_expand_vector_init (target, copy);
13509
13510       /* Insert variable.  */
13511       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13512       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13513       return;
13514     }
13515
13516   /* Construct the vector in memory one field at a time
13517      and load the whole vector.  */
13518   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13519   for (i = 0; i < n_elts; i++)
13520     emit_move_insn (adjust_address_nv (mem, inner_mode,
13521                                     i * GET_MODE_SIZE (inner_mode)),
13522                     XVECEXP (vals, 0, i));
13523   emit_move_insn (target, mem);
13524 }
13525
13526 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13527    ERR if it doesn't.  EXP indicates the source location, which includes the
13528    inlining history for intrinsics.  */
13529
13530 static void
13531 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13532               const_tree exp, const char *desc)
13533 {
13534   HOST_WIDE_INT lane;
13535
13536   gcc_assert (CONST_INT_P (operand));
13537
13538   lane = INTVAL (operand);
13539
13540   if (lane < low || lane >= high)
13541     {
13542       if (exp)
13543         error_at (EXPR_LOCATION (exp),
13544                   "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13545       else
13546         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13547     }
13548 }
13549
13550 /* Bounds-check lanes.  */
13551
13552 void
13553 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13554                   const_tree exp)
13555 {
13556   bounds_check (operand, low, high, exp, "lane");
13557 }
13558
13559 /* Bounds-check constants.  */
13560
13561 void
13562 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13563 {
13564   bounds_check (operand, low, high, NULL_TREE, "constant");
13565 }
13566
13567 HOST_WIDE_INT
13568 neon_element_bits (machine_mode mode)
13569 {
13570   return GET_MODE_UNIT_BITSIZE (mode);
13571 }
13572
13573 \f
13574 /* Predicates for `match_operand' and `match_operator'.  */
13575
13576 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13577    WB level is 2 if full writeback address modes are allowed, 1
13578    if limited writeback address modes (POST_INC and PRE_DEC) are
13579    allowed and 0 if no writeback at all is supported.  */
13580
13581 int
13582 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13583 {
13584   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13585   rtx ind;
13586
13587   /* Reject eliminable registers.  */
13588   if (! (reload_in_progress || reload_completed || lra_in_progress)
13589       && (   reg_mentioned_p (frame_pointer_rtx, op)
13590           || reg_mentioned_p (arg_pointer_rtx, op)
13591           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13592           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13593           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13594           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13595     return FALSE;
13596
13597   /* Constants are converted into offsets from labels.  */
13598   if (!MEM_P (op))
13599     return FALSE;
13600
13601   ind = XEXP (op, 0);
13602
13603   if (reload_completed
13604       && (LABEL_REF_P (ind)
13605           || (GET_CODE (ind) == CONST
13606               && GET_CODE (XEXP (ind, 0)) == PLUS
13607               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13608               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13609     return TRUE;
13610
13611   /* Match: (mem (reg)).  */
13612   if (REG_P (ind))
13613     return arm_address_register_rtx_p (ind, 0);
13614
13615   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13616      acceptable in any case (subject to verification by
13617      arm_address_register_rtx_p).  We need full writeback to accept
13618      PRE_INC and POST_DEC, and at least restricted writeback for
13619      PRE_INC and POST_DEC.  */
13620   if (wb_level > 0
13621       && (GET_CODE (ind) == POST_INC
13622           || GET_CODE (ind) == PRE_DEC
13623           || (wb_level > 1
13624               && (GET_CODE (ind) == PRE_INC
13625                   || GET_CODE (ind) == POST_DEC))))
13626     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13627
13628   if (wb_level > 1
13629       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13630       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13631       && GET_CODE (XEXP (ind, 1)) == PLUS
13632       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13633     ind = XEXP (ind, 1);
13634
13635   /* Match:
13636      (plus (reg)
13637            (const))
13638
13639      The encoded immediate for 16-bit modes is multiplied by 2,
13640      while the encoded immediate for 32-bit and 64-bit modes is
13641      multiplied by 4.  */
13642   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13643   if (GET_CODE (ind) == PLUS
13644       && REG_P (XEXP (ind, 0))
13645       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13646       && CONST_INT_P (XEXP (ind, 1))
13647       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13648       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13649     return TRUE;
13650
13651   return FALSE;
13652 }
13653
13654 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13655    WB is true if full writeback address modes are allowed and is false
13656    if limited writeback address modes (POST_INC and PRE_DEC) are
13657    allowed.  */
13658
13659 int arm_coproc_mem_operand (rtx op, bool wb)
13660 {
13661   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13662 }
13663
13664 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13665    context in which no writeback address modes are allowed.  */
13666
13667 int
13668 arm_coproc_mem_operand_no_writeback (rtx op)
13669 {
13670   return arm_coproc_mem_operand_wb (op, 0);
13671 }
13672
13673 /* In non-STRICT mode, return the register number; in STRICT mode return
13674    the hard regno or the replacement if it won't be a mem.  Otherwise, return
13675    the original pseudo number.  */
13676 static int
13677 arm_effective_regno (rtx op, bool strict)
13678 {
13679   gcc_assert (REG_P (op));
13680   if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
13681       || !reg_renumber || reg_renumber[REGNO (op)] < 0)
13682     return REGNO (op);
13683   return reg_renumber[REGNO (op)];
13684 }
13685
13686 /* This function returns TRUE on matching mode and op.
13687 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13688 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13689 int
13690 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13691 {
13692   enum rtx_code code;
13693   int val, reg_no;
13694
13695   /* Match: (mem (reg)).  */
13696   if (REG_P (op))
13697     {
13698       reg_no = arm_effective_regno (op, strict);
13699       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13700                ? reg_no <= LAST_LO_REGNUM
13701                : reg_no < LAST_ARM_REGNUM)
13702               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13703     }
13704   code = GET_CODE (op);
13705
13706   if ((code == POST_INC
13707        || code == PRE_DEC
13708        || code == PRE_INC
13709        || code == POST_DEC)
13710       && REG_P (XEXP (op, 0)))
13711     {
13712       reg_no = arm_effective_regno (XEXP (op, 0), strict);
13713       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13714                ? reg_no <= LAST_LO_REGNUM
13715                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13716               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13717     }
13718   else if (((code == POST_MODIFY || code == PRE_MODIFY)
13719             && GET_CODE (XEXP (op, 1)) == PLUS
13720             && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13721             && REG_P (XEXP (op, 0))
13722             && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13723            /* Make sure to only accept PLUS after reload_completed, otherwise
13724               this will interfere with auto_inc's pattern detection.  */
13725            || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13726                && GET_CODE (XEXP (op, 1)) == CONST_INT))
13727     {
13728       reg_no = arm_effective_regno (XEXP (op, 0), strict);
13729       if (code == PLUS)
13730         val = INTVAL (XEXP (op, 1));
13731       else
13732         val = INTVAL (XEXP(XEXP (op, 1), 1));
13733
13734       switch (mode)
13735         {
13736           case E_V16QImode:
13737           case E_V8QImode:
13738           case E_V4QImode:
13739             if (abs (val) > 127)
13740               return FALSE;
13741             break;
13742           case E_V8HImode:
13743           case E_V8HFmode:
13744           case E_V4HImode:
13745           case E_V4HFmode:
13746             if (val % 2 != 0 || abs (val) > 254)
13747               return FALSE;
13748             break;
13749           case E_V4SImode:
13750           case E_V4SFmode:
13751             if (val % 4 != 0 || abs (val) > 508)
13752               return FALSE;
13753             break;
13754           default:
13755             return FALSE;
13756         }
13757       return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13758               || (MVE_STN_LDW_MODE (mode)
13759                   ? reg_no <= LAST_LO_REGNUM
13760                   : (reg_no < LAST_ARM_REGNUM
13761                      && (code == PLUS || reg_no != SP_REGNUM))));
13762     }
13763   return FALSE;
13764 }
13765
13766 /* Return TRUE if OP is a memory operand which we can load or store a vector
13767    to/from. TYPE is one of the following values:
13768     0 - Vector load/stor (vldr)
13769     1 - Core registers (ldm)
13770     2 - Element/structure loads (vld1)
13771  */
13772 int
13773 neon_vector_mem_operand (rtx op, int type, bool strict)
13774 {
13775   rtx ind;
13776
13777   /* Reject eliminable registers.  */
13778   if (strict && ! (reload_in_progress || reload_completed)
13779       && (reg_mentioned_p (frame_pointer_rtx, op)
13780           || reg_mentioned_p (arg_pointer_rtx, op)
13781           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13782           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13783           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13784           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13785     return FALSE;
13786
13787   /* Constants are converted into offsets from labels.  */
13788   if (!MEM_P (op))
13789     return FALSE;
13790
13791   ind = XEXP (op, 0);
13792
13793   if (reload_completed
13794       && (LABEL_REF_P (ind)
13795           || (GET_CODE (ind) == CONST
13796               && GET_CODE (XEXP (ind, 0)) == PLUS
13797               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13798               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13799     return TRUE;
13800
13801   /* Match: (mem (reg)).  */
13802   if (REG_P (ind))
13803     return arm_address_register_rtx_p (ind, 0);
13804
13805   /* Allow post-increment with Neon registers.  */
13806   if ((type != 1 && GET_CODE (ind) == POST_INC)
13807       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13808     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13809
13810   /* Allow post-increment by register for VLDn */
13811   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13812       && GET_CODE (XEXP (ind, 1)) == PLUS
13813       && REG_P (XEXP (XEXP (ind, 1), 1))
13814       && REG_P (XEXP (ind, 0))
13815       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13816      return true;
13817
13818   /* Match:
13819      (plus (reg)
13820           (const)).  */
13821   if (type == 0
13822       && GET_CODE (ind) == PLUS
13823       && REG_P (XEXP (ind, 0))
13824       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13825       && CONST_INT_P (XEXP (ind, 1))
13826       && INTVAL (XEXP (ind, 1)) > -1024
13827       /* For quad modes, we restrict the constant offset to be slightly less
13828          than what the instruction format permits.  We have no such constraint
13829          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13830       && (INTVAL (XEXP (ind, 1))
13831           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13832       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13833     return TRUE;
13834
13835   return FALSE;
13836 }
13837
13838 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13839    type.  */
13840 int
13841 mve_struct_mem_operand (rtx op)
13842 {
13843   rtx ind = XEXP (op, 0);
13844
13845   /* Match: (mem (reg)).  */
13846   if (REG_P (ind))
13847     return arm_address_register_rtx_p (ind, 0);
13848
13849   /* Allow only post-increment by the mode size.  */
13850   if (GET_CODE (ind) == POST_INC)
13851     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13852
13853   return FALSE;
13854 }
13855
13856 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13857    type.  */
13858 int
13859 neon_struct_mem_operand (rtx op)
13860 {
13861   rtx ind;
13862
13863   /* Reject eliminable registers.  */
13864   if (! (reload_in_progress || reload_completed)
13865       && (   reg_mentioned_p (frame_pointer_rtx, op)
13866           || reg_mentioned_p (arg_pointer_rtx, op)
13867           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13868           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13869           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13870           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13871     return FALSE;
13872
13873   /* Constants are converted into offsets from labels.  */
13874   if (!MEM_P (op))
13875     return FALSE;
13876
13877   ind = XEXP (op, 0);
13878
13879   if (reload_completed
13880       && (LABEL_REF_P (ind)
13881           || (GET_CODE (ind) == CONST
13882               && GET_CODE (XEXP (ind, 0)) == PLUS
13883               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13884               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13885     return TRUE;
13886
13887   /* Match: (mem (reg)).  */
13888   if (REG_P (ind))
13889     return arm_address_register_rtx_p (ind, 0);
13890
13891   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13892   if (GET_CODE (ind) == POST_INC
13893       || GET_CODE (ind) == PRE_DEC)
13894     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13895
13896   return FALSE;
13897 }
13898
13899 /* Prepares the operands for the VCMLA by lane instruction such that the right
13900    register number is selected.  This instruction is special in that it always
13901    requires a D register, however there is a choice to be made between Dn[0],
13902    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13903
13904    The VCMLA by lane function always selects two values. For instance given D0
13905    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13906    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13907    D0[0] or D1[0] are both valid.
13908
13909    This function centralizes that information based on OPERANDS, OPERANDS[3]
13910    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13911    updated to contain the right index.  */
13912
13913 rtx *
13914 neon_vcmla_lane_prepare_operands (rtx *operands)
13915 {
13916   int lane = INTVAL (operands[4]);
13917   machine_mode constmode = SImode;
13918   machine_mode mode = GET_MODE (operands[3]);
13919   int regno = REGNO (operands[3]);
13920   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13921   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13922     {
13923       operands[3] = gen_int_mode (regno + 1, constmode);
13924       operands[4]
13925         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13926     }
13927   else
13928     {
13929       operands[3] = gen_int_mode (regno, constmode);
13930       operands[4] = gen_int_mode (lane, constmode);
13931     }
13932   return operands;
13933 }
13934
13935
13936 /* Return true if X is a register that will be eliminated later on.  */
13937 int
13938 arm_eliminable_register (rtx x)
13939 {
13940   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13941                        || REGNO (x) == ARG_POINTER_REGNUM
13942                        || VIRTUAL_REGISTER_P (x));
13943 }
13944
13945 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13946    coprocessor registers.  Otherwise return NO_REGS.  */
13947
13948 enum reg_class
13949 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13950 {
13951   if (mode == HFmode)
13952     {
13953       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13954         return GENERAL_REGS;
13955       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13956         return NO_REGS;
13957       return GENERAL_REGS;
13958     }
13959
13960   /* The neon move patterns handle all legitimate vector and struct
13961      addresses.  */
13962   if (TARGET_NEON
13963       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13964       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13965           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13966           || VALID_NEON_STRUCT_MODE (mode)))
13967     return NO_REGS;
13968
13969   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13970     return NO_REGS;
13971
13972   return GENERAL_REGS;
13973 }
13974
13975 /* Values which must be returned in the most-significant end of the return
13976    register.  */
13977
13978 static bool
13979 arm_return_in_msb (const_tree valtype)
13980 {
13981   return (TARGET_AAPCS_BASED
13982           && BYTES_BIG_ENDIAN
13983           && (AGGREGATE_TYPE_P (valtype)
13984               || TREE_CODE (valtype) == COMPLEX_TYPE
13985               || FIXED_POINT_TYPE_P (valtype)));
13986 }
13987
13988 /* Return TRUE if X references a SYMBOL_REF.  */
13989 int
13990 symbol_mentioned_p (rtx x)
13991 {
13992   const char * fmt;
13993   int i;
13994
13995   if (SYMBOL_REF_P (x))
13996     return 1;
13997
13998   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13999      are constant offsets, not symbols.  */
14000   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14001     return 0;
14002
14003   fmt = GET_RTX_FORMAT (GET_CODE (x));
14004
14005   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14006     {
14007       if (fmt[i] == 'E')
14008         {
14009           int j;
14010
14011           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14012             if (symbol_mentioned_p (XVECEXP (x, i, j)))
14013               return 1;
14014         }
14015       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
14016         return 1;
14017     }
14018
14019   return 0;
14020 }
14021
14022 /* Return TRUE if X references a LABEL_REF.  */
14023 int
14024 label_mentioned_p (rtx x)
14025 {
14026   const char * fmt;
14027   int i;
14028
14029   if (LABEL_REF_P (x))
14030     return 1;
14031
14032   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14033      instruction, but they are constant offsets, not symbols.  */
14034   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14035     return 0;
14036
14037   fmt = GET_RTX_FORMAT (GET_CODE (x));
14038   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14039     {
14040       if (fmt[i] == 'E')
14041         {
14042           int j;
14043
14044           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14045             if (label_mentioned_p (XVECEXP (x, i, j)))
14046               return 1;
14047         }
14048       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14049         return 1;
14050     }
14051
14052   return 0;
14053 }
14054
14055 int
14056 tls_mentioned_p (rtx x)
14057 {
14058   switch (GET_CODE (x))
14059     {
14060     case CONST:
14061       return tls_mentioned_p (XEXP (x, 0));
14062
14063     case UNSPEC:
14064       if (XINT (x, 1) == UNSPEC_TLS)
14065         return 1;
14066
14067     /* Fall through.  */
14068     default:
14069       return 0;
14070     }
14071 }
14072
14073 /* Must not copy any rtx that uses a pc-relative address.
14074    Also, disallow copying of load-exclusive instructions that
14075    may appear after splitting of compare-and-swap-style operations
14076    so as to prevent those loops from being transformed away from their
14077    canonical forms (see PR 69904).  */
14078
14079 static bool
14080 arm_cannot_copy_insn_p (rtx_insn *insn)
14081 {
14082   /* The tls call insn cannot be copied, as it is paired with a data
14083      word.  */
14084   if (recog_memoized (insn) == CODE_FOR_tlscall)
14085     return true;
14086
14087   subrtx_iterator::array_type array;
14088   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14089     {
14090       const_rtx x = *iter;
14091       if (GET_CODE (x) == UNSPEC
14092           && (XINT (x, 1) == UNSPEC_PIC_BASE
14093               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14094         return true;
14095     }
14096
14097   rtx set = single_set (insn);
14098   if (set)
14099     {
14100       rtx src = SET_SRC (set);
14101       if (GET_CODE (src) == ZERO_EXTEND)
14102         src = XEXP (src, 0);
14103
14104       /* Catch the load-exclusive and load-acquire operations.  */
14105       if (GET_CODE (src) == UNSPEC_VOLATILE
14106           && (XINT (src, 1) == VUNSPEC_LL
14107               || XINT (src, 1) == VUNSPEC_LAX))
14108         return true;
14109     }
14110   return false;
14111 }
14112
14113 enum rtx_code
14114 minmax_code (rtx x)
14115 {
14116   enum rtx_code code = GET_CODE (x);
14117
14118   switch (code)
14119     {
14120     case SMAX:
14121       return GE;
14122     case SMIN:
14123       return LE;
14124     case UMIN:
14125       return LEU;
14126     case UMAX:
14127       return GEU;
14128     default:
14129       gcc_unreachable ();
14130     }
14131 }
14132
14133 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
14134
14135 bool
14136 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14137                         int *mask, bool *signed_sat)
14138 {
14139   /* The high bound must be a power of two minus one.  */
14140   int log = exact_log2 (INTVAL (hi_bound) + 1);
14141   if (log == -1)
14142     return false;
14143
14144   /* The low bound is either zero (for usat) or one less than the
14145      negation of the high bound (for ssat).  */
14146   if (INTVAL (lo_bound) == 0)
14147     {
14148       if (mask)
14149         *mask = log;
14150       if (signed_sat)
14151         *signed_sat = false;
14152
14153       return true;
14154     }
14155
14156   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14157     {
14158       if (mask)
14159         *mask = log + 1;
14160       if (signed_sat)
14161         *signed_sat = true;
14162
14163       return true;
14164     }
14165
14166   return false;
14167 }
14168
14169 /* Return 1 if memory locations are adjacent.  */
14170 int
14171 adjacent_mem_locations (rtx a, rtx b)
14172 {
14173   /* We don't guarantee to preserve the order of these memory refs.  */
14174   if (volatile_refs_p (a) || volatile_refs_p (b))
14175     return 0;
14176
14177   if ((REG_P (XEXP (a, 0))
14178        || (GET_CODE (XEXP (a, 0)) == PLUS
14179            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14180       && (REG_P (XEXP (b, 0))
14181           || (GET_CODE (XEXP (b, 0)) == PLUS
14182               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14183     {
14184       HOST_WIDE_INT val0 = 0, val1 = 0;
14185       rtx reg0, reg1;
14186       int val_diff;
14187
14188       if (GET_CODE (XEXP (a, 0)) == PLUS)
14189         {
14190           reg0 = XEXP (XEXP (a, 0), 0);
14191           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14192         }
14193       else
14194         reg0 = XEXP (a, 0);
14195
14196       if (GET_CODE (XEXP (b, 0)) == PLUS)
14197         {
14198           reg1 = XEXP (XEXP (b, 0), 0);
14199           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14200         }
14201       else
14202         reg1 = XEXP (b, 0);
14203
14204       /* Don't accept any offset that will require multiple
14205          instructions to handle, since this would cause the
14206          arith_adjacentmem pattern to output an overlong sequence.  */
14207       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14208         return 0;
14209
14210       /* Don't allow an eliminable register: register elimination can make
14211          the offset too large.  */
14212       if (arm_eliminable_register (reg0))
14213         return 0;
14214
14215       val_diff = val1 - val0;
14216
14217       if (arm_ld_sched)
14218         {
14219           /* If the target has load delay slots, then there's no benefit
14220              to using an ldm instruction unless the offset is zero and
14221              we are optimizing for size.  */
14222           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14223                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14224                   && (val_diff == 4 || val_diff == -4));
14225         }
14226
14227       return ((REGNO (reg0) == REGNO (reg1))
14228               && (val_diff == 4 || val_diff == -4));
14229     }
14230
14231   return 0;
14232 }
14233
14234 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
14235    for load operations, false for store operations.  CONSECUTIVE is true
14236    if the register numbers in the operation must be consecutive in the register
14237    bank. RETURN_PC is true if value is to be loaded in PC.
14238    The pattern we are trying to match for load is:
14239      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14240       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14241        :
14242        :
14243       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14244      ]
14245      where
14246      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14247      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14248      3.  If consecutive is TRUE, then for kth register being loaded,
14249          REGNO (R_dk) = REGNO (R_d0) + k.
14250    The pattern for store is similar.  */
14251 bool
14252 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14253                      bool consecutive, bool return_pc)
14254 {
14255   HOST_WIDE_INT count = XVECLEN (op, 0);
14256   rtx reg, mem, addr;
14257   unsigned regno;
14258   unsigned first_regno;
14259   HOST_WIDE_INT i = 1, base = 0, offset = 0;
14260   rtx elt;
14261   bool addr_reg_in_reglist = false;
14262   bool update = false;
14263   int reg_increment;
14264   int offset_adj;
14265   int regs_per_val;
14266
14267   /* If not in SImode, then registers must be consecutive
14268      (e.g., VLDM instructions for DFmode).  */
14269   gcc_assert ((mode == SImode) || consecutive);
14270   /* Setting return_pc for stores is illegal.  */
14271   gcc_assert (!return_pc || load);
14272
14273   /* Set up the increments and the regs per val based on the mode.  */
14274   reg_increment = GET_MODE_SIZE (mode);
14275   regs_per_val = reg_increment / 4;
14276   offset_adj = return_pc ? 1 : 0;
14277
14278   if (count <= 1
14279       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14280       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14281     return false;
14282
14283   /* Check if this is a write-back.  */
14284   elt = XVECEXP (op, 0, offset_adj);
14285   if (GET_CODE (SET_SRC (elt)) == PLUS)
14286     {
14287       i++;
14288       base = 1;
14289       update = true;
14290
14291       /* The offset adjustment must be the number of registers being
14292          popped times the size of a single register.  */
14293       if (!REG_P (SET_DEST (elt))
14294           || !REG_P (XEXP (SET_SRC (elt), 0))
14295           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14296           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14297           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14298              ((count - 1 - offset_adj) * reg_increment))
14299         return false;
14300     }
14301
14302   i = i + offset_adj;
14303   base = base + offset_adj;
14304   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14305      success depends on the type: VLDM can do just one reg,
14306      LDM must do at least two.  */
14307   if ((count <= i) && (mode == SImode))
14308       return false;
14309
14310   elt = XVECEXP (op, 0, i - 1);
14311   if (GET_CODE (elt) != SET)
14312     return false;
14313
14314   if (load)
14315     {
14316       reg = SET_DEST (elt);
14317       mem = SET_SRC (elt);
14318     }
14319   else
14320     {
14321       reg = SET_SRC (elt);
14322       mem = SET_DEST (elt);
14323     }
14324
14325   if (!REG_P (reg) || !MEM_P (mem))
14326     return false;
14327
14328   regno = REGNO (reg);
14329   first_regno = regno;
14330   addr = XEXP (mem, 0);
14331   if (GET_CODE (addr) == PLUS)
14332     {
14333       if (!CONST_INT_P (XEXP (addr, 1)))
14334         return false;
14335
14336       offset = INTVAL (XEXP (addr, 1));
14337       addr = XEXP (addr, 0);
14338     }
14339
14340   if (!REG_P (addr))
14341     return false;
14342
14343   /* Don't allow SP to be loaded unless it is also the base register. It
14344      guarantees that SP is reset correctly when an LDM instruction
14345      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14346   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14347     return false;
14348
14349   if (regno == REGNO (addr))
14350     addr_reg_in_reglist = true;
14351
14352   for (; i < count; i++)
14353     {
14354       elt = XVECEXP (op, 0, i);
14355       if (GET_CODE (elt) != SET)
14356         return false;
14357
14358       if (load)
14359         {
14360           reg = SET_DEST (elt);
14361           mem = SET_SRC (elt);
14362         }
14363       else
14364         {
14365           reg = SET_SRC (elt);
14366           mem = SET_DEST (elt);
14367         }
14368
14369       if (!REG_P (reg)
14370           || GET_MODE (reg) != mode
14371           || REGNO (reg) <= regno
14372           || (consecutive
14373               && (REGNO (reg) !=
14374                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14375           /* Don't allow SP to be loaded unless it is also the base register. It
14376              guarantees that SP is reset correctly when an LDM instruction
14377              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14378           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14379           || !MEM_P (mem)
14380           || GET_MODE (mem) != mode
14381           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14382                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14383                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14384                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14385                    offset + (i - base) * reg_increment))
14386               && (!REG_P (XEXP (mem, 0))
14387                   || offset + (i - base) * reg_increment != 0)))
14388         return false;
14389
14390       regno = REGNO (reg);
14391       if (regno == REGNO (addr))
14392         addr_reg_in_reglist = true;
14393     }
14394
14395   if (load)
14396     {
14397       if (update && addr_reg_in_reglist)
14398         return false;
14399
14400       /* For Thumb-1, address register is always modified - either by write-back
14401          or by explicit load.  If the pattern does not describe an update,
14402          then the address register must be in the list of loaded registers.  */
14403       if (TARGET_THUMB1)
14404         return update || addr_reg_in_reglist;
14405     }
14406
14407   return true;
14408 }
14409
14410 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14411    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14412    following form:
14413
14414    [(set (reg:SI <N>) (const_int 0))
14415     (set (reg:SI <M>) (const_int 0))
14416     ...
14417     (unspec_volatile [(const_int 0)]
14418                      VUNSPEC_CLRM_APSR)
14419     (clobber (reg:CC CC_REGNUM))
14420    ]
14421
14422    Any number (including 0) of set expressions is valid, the volatile unspec is
14423    optional.  All registers but SP and PC are allowed and registers must be in
14424    strict increasing order.
14425
14426    To be a valid VSCCLRM pattern, OP must have the following form:
14427
14428    [(unspec_volatile [(const_int 0)]
14429                      VUNSPEC_VSCCLRM_VPR)
14430     (set (reg:SF <N>) (const_int 0))
14431     (set (reg:SF <M>) (const_int 0))
14432     ...
14433    ]
14434
14435    As with CLRM, any number (including 0) of set expressions is valid, however
14436    the volatile unspec is mandatory here.  Any VFP single-precision register is
14437    accepted but all registers must be consecutive and in increasing order.  */
14438
14439 bool
14440 clear_operation_p (rtx op, bool vfp)
14441 {
14442   unsigned regno;
14443   unsigned last_regno = INVALID_REGNUM;
14444   rtx elt, reg, zero;
14445   int count = XVECLEN (op, 0);
14446   int first_set = vfp ? 1 : 0;
14447   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14448
14449   for (int i = first_set; i < count; i++)
14450     {
14451       elt = XVECEXP (op, 0, i);
14452
14453       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14454         {
14455           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14456               || XVECLEN (elt, 0) != 1
14457               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14458               || i != count - 2)
14459             return false;
14460
14461           continue;
14462         }
14463
14464       if (GET_CODE (elt) == CLOBBER)
14465         continue;
14466
14467       if (GET_CODE (elt) != SET)
14468         return false;
14469
14470       reg = SET_DEST (elt);
14471       zero = SET_SRC (elt);
14472
14473       if (!REG_P (reg)
14474           || GET_MODE (reg) != expected_mode
14475           || zero != CONST0_RTX (SImode))
14476         return false;
14477
14478       regno = REGNO (reg);
14479
14480       if (vfp)
14481         {
14482           if (i != first_set && regno != last_regno + 1)
14483             return false;
14484         }
14485       else
14486         {
14487           if (regno == SP_REGNUM || regno == PC_REGNUM)
14488             return false;
14489           if (i != first_set && regno <= last_regno)
14490             return false;
14491         }
14492
14493       last_regno = regno;
14494     }
14495
14496   return true;
14497 }
14498
14499 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14500    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14501    instruction.  ADD_OFFSET is nonzero if the base address register needs
14502    to be modified with an add instruction before we can use it.  */
14503
14504 static bool
14505 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14506                                  int nops, HOST_WIDE_INT add_offset)
14507  {
14508   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14509      if the offset isn't small enough.  The reason 2 ldrs are faster
14510      is because these ARMs are able to do more than one cache access
14511      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14512      whilst the ARM8 has a double bandwidth cache.  This means that
14513      these cores can do both an instruction fetch and a data fetch in
14514      a single cycle, so the trick of calculating the address into a
14515      scratch register (one of the result regs) and then doing a load
14516      multiple actually becomes slower (and no smaller in code size).
14517      That is the transformation
14518
14519         ldr     rd1, [rbase + offset]
14520         ldr     rd2, [rbase + offset + 4]
14521
14522      to
14523
14524         add     rd1, rbase, offset
14525         ldmia   rd1, {rd1, rd2}
14526
14527      produces worse code -- '3 cycles + any stalls on rd2' instead of
14528      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14529      access per cycle, the first sequence could never complete in less
14530      than 6 cycles, whereas the ldm sequence would only take 5 and
14531      would make better use of sequential accesses if not hitting the
14532      cache.
14533
14534      We cheat here and test 'arm_ld_sched' which we currently know to
14535      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14536      changes, then the test below needs to be reworked.  */
14537   if (nops == 2 && arm_ld_sched && add_offset != 0)
14538     return false;
14539
14540   /* XScale has load-store double instructions, but they have stricter
14541      alignment requirements than load-store multiple, so we cannot
14542      use them.
14543
14544      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14545      the pipeline until completion.
14546
14547         NREGS           CYCLES
14548           1               3
14549           2               4
14550           3               5
14551           4               6
14552
14553      An ldr instruction takes 1-3 cycles, but does not block the
14554      pipeline.
14555
14556         NREGS           CYCLES
14557           1              1-3
14558           2              2-6
14559           3              3-9
14560           4              4-12
14561
14562      Best case ldr will always win.  However, the more ldr instructions
14563      we issue, the less likely we are to be able to schedule them well.
14564      Using ldr instructions also increases code size.
14565
14566      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14567      for counts of 3 or 4 regs.  */
14568   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14569     return false;
14570   return true;
14571 }
14572
14573 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14574    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14575    an array ORDER which describes the sequence to use when accessing the
14576    offsets that produces an ascending order.  In this sequence, each
14577    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14578    must have been filled in with the lowest offset by the caller.
14579    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14580    we use to verify that ORDER produces an ascending order of registers.
14581    Return true if it was possible to construct such an order, false if
14582    not.  */
14583
14584 static bool
14585 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14586                       int *unsorted_regs)
14587 {
14588   int i;
14589   for (i = 1; i < nops; i++)
14590     {
14591       int j;
14592
14593       order[i] = order[i - 1];
14594       for (j = 0; j < nops; j++)
14595         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14596           {
14597             /* We must find exactly one offset that is higher than the
14598                previous one by 4.  */
14599             if (order[i] != order[i - 1])
14600               return false;
14601             order[i] = j;
14602           }
14603       if (order[i] == order[i - 1])
14604         return false;
14605       /* The register numbers must be ascending.  */
14606       if (unsorted_regs != NULL
14607           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14608         return false;
14609     }
14610   return true;
14611 }
14612
14613 /* Used to determine in a peephole whether a sequence of load
14614    instructions can be changed into a load-multiple instruction.
14615    NOPS is the number of separate load instructions we are examining.  The
14616    first NOPS entries in OPERANDS are the destination registers, the
14617    next NOPS entries are memory operands.  If this function is
14618    successful, *BASE is set to the common base register of the memory
14619    accesses; *LOAD_OFFSET is set to the first memory location's offset
14620    from that base register.
14621    REGS is an array filled in with the destination register numbers.
14622    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14623    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14624    the sequence of registers in REGS matches the loads from ascending memory
14625    locations, and the function verifies that the register numbers are
14626    themselves ascending.  If CHECK_REGS is false, the register numbers
14627    are stored in the order they are found in the operands.  */
14628 static int
14629 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14630                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14631 {
14632   int unsorted_regs[MAX_LDM_STM_OPS];
14633   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14634   int order[MAX_LDM_STM_OPS];
14635   int base_reg = -1;
14636   int i, ldm_case;
14637
14638   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14639      easily extended if required.  */
14640   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14641
14642   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14643
14644   /* Loop over the operands and check that the memory references are
14645      suitable (i.e. immediate offsets from the same base register).  At
14646      the same time, extract the target register, and the memory
14647      offsets.  */
14648   for (i = 0; i < nops; i++)
14649     {
14650       rtx reg;
14651       rtx offset;
14652
14653       /* Convert a subreg of a mem into the mem itself.  */
14654       if (GET_CODE (operands[nops + i]) == SUBREG)
14655         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14656
14657       gcc_assert (MEM_P (operands[nops + i]));
14658
14659       /* Don't reorder volatile memory references; it doesn't seem worth
14660          looking for the case where the order is ok anyway.  */
14661       if (MEM_VOLATILE_P (operands[nops + i]))
14662         return 0;
14663
14664       offset = const0_rtx;
14665
14666       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14667            || (SUBREG_P (reg)
14668                && REG_P (reg = SUBREG_REG (reg))))
14669           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14670               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14671                   || (SUBREG_P (reg)
14672                       && REG_P (reg = SUBREG_REG (reg))))
14673               && (CONST_INT_P (offset
14674                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14675         {
14676           if (i == 0)
14677             {
14678               base_reg = REGNO (reg);
14679               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14680                 return 0;
14681             }
14682           else if (base_reg != (int) REGNO (reg))
14683             /* Not addressed from the same base register.  */
14684             return 0;
14685
14686           unsorted_regs[i] = (REG_P (operands[i])
14687                               ? REGNO (operands[i])
14688                               : REGNO (SUBREG_REG (operands[i])));
14689
14690           /* If it isn't an integer register, or if it overwrites the
14691              base register but isn't the last insn in the list, then
14692              we can't do this.  */
14693           if (unsorted_regs[i] < 0
14694               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14695               || unsorted_regs[i] > 14
14696               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14697             return 0;
14698
14699           /* Don't allow SP to be loaded unless it is also the base
14700              register.  It guarantees that SP is reset correctly when
14701              an LDM instruction is interrupted.  Otherwise, we might
14702              end up with a corrupt stack.  */
14703           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14704             return 0;
14705
14706           unsorted_offsets[i] = INTVAL (offset);
14707           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14708             order[0] = i;
14709         }
14710       else
14711         /* Not a suitable memory address.  */
14712         return 0;
14713     }
14714
14715   /* All the useful information has now been extracted from the
14716      operands into unsorted_regs and unsorted_offsets; additionally,
14717      order[0] has been set to the lowest offset in the list.  Sort
14718      the offsets into order, verifying that they are adjacent, and
14719      check that the register numbers are ascending.  */
14720   if (!compute_offset_order (nops, unsorted_offsets, order,
14721                              check_regs ? unsorted_regs : NULL))
14722     return 0;
14723
14724   if (saved_order)
14725     memcpy (saved_order, order, sizeof order);
14726
14727   if (base)
14728     {
14729       *base = base_reg;
14730
14731       for (i = 0; i < nops; i++)
14732         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14733
14734       *load_offset = unsorted_offsets[order[0]];
14735     }
14736
14737   if (unsorted_offsets[order[0]] == 0)
14738     ldm_case = 1; /* ldmia */
14739   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14740     ldm_case = 2; /* ldmib */
14741   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14742     ldm_case = 3; /* ldmda */
14743   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14744     ldm_case = 4; /* ldmdb */
14745   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14746            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14747     ldm_case = 5;
14748   else
14749     return 0;
14750
14751   if (!multiple_operation_profitable_p (false, nops,
14752                                         ldm_case == 5
14753                                         ? unsorted_offsets[order[0]] : 0))
14754     return 0;
14755
14756   return ldm_case;
14757 }
14758
14759 /* Used to determine in a peephole whether a sequence of store instructions can
14760    be changed into a store-multiple instruction.
14761    NOPS is the number of separate store instructions we are examining.
14762    NOPS_TOTAL is the total number of instructions recognized by the peephole
14763    pattern.
14764    The first NOPS entries in OPERANDS are the source registers, the next
14765    NOPS entries are memory operands.  If this function is successful, *BASE is
14766    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14767    to the first memory location's offset from that base register.  REGS is an
14768    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14769    likewise filled with the corresponding rtx's.
14770    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14771    numbers to an ascending order of stores.
14772    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14773    from ascending memory locations, and the function verifies that the register
14774    numbers are themselves ascending.  If CHECK_REGS is false, the register
14775    numbers are stored in the order they are found in the operands.  */
14776 static int
14777 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14778                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14779                          HOST_WIDE_INT *load_offset, bool check_regs)
14780 {
14781   int unsorted_regs[MAX_LDM_STM_OPS];
14782   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14783   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14784   int order[MAX_LDM_STM_OPS];
14785   int base_reg = -1;
14786   rtx base_reg_rtx = NULL;
14787   int i, stm_case;
14788
14789   /* Write back of base register is currently only supported for Thumb 1.  */
14790   int base_writeback = TARGET_THUMB1;
14791
14792   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14793      easily extended if required.  */
14794   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14795
14796   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14797
14798   /* Loop over the operands and check that the memory references are
14799      suitable (i.e. immediate offsets from the same base register).  At
14800      the same time, extract the target register, and the memory
14801      offsets.  */
14802   for (i = 0; i < nops; i++)
14803     {
14804       rtx reg;
14805       rtx offset;
14806
14807       /* Convert a subreg of a mem into the mem itself.  */
14808       if (GET_CODE (operands[nops + i]) == SUBREG)
14809         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14810
14811       gcc_assert (MEM_P (operands[nops + i]));
14812
14813       /* Don't reorder volatile memory references; it doesn't seem worth
14814          looking for the case where the order is ok anyway.  */
14815       if (MEM_VOLATILE_P (operands[nops + i]))
14816         return 0;
14817
14818       offset = const0_rtx;
14819
14820       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14821            || (SUBREG_P (reg)
14822                && REG_P (reg = SUBREG_REG (reg))))
14823           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14824               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14825                   || (SUBREG_P (reg)
14826                       && REG_P (reg = SUBREG_REG (reg))))
14827               && (CONST_INT_P (offset
14828                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14829         {
14830           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14831                                   ? operands[i] : SUBREG_REG (operands[i]));
14832           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14833
14834           if (i == 0)
14835             {
14836               base_reg = REGNO (reg);
14837               base_reg_rtx = reg;
14838               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14839                 return 0;
14840             }
14841           else if (base_reg != (int) REGNO (reg))
14842             /* Not addressed from the same base register.  */
14843             return 0;
14844
14845           /* If it isn't an integer register, then we can't do this.  */
14846           if (unsorted_regs[i] < 0
14847               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14848               /* The effects are unpredictable if the base register is
14849                  both updated and stored.  */
14850               || (base_writeback && unsorted_regs[i] == base_reg)
14851               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14852               || unsorted_regs[i] > 14)
14853             return 0;
14854
14855           unsorted_offsets[i] = INTVAL (offset);
14856           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14857             order[0] = i;
14858         }
14859       else
14860         /* Not a suitable memory address.  */
14861         return 0;
14862     }
14863
14864   /* All the useful information has now been extracted from the
14865      operands into unsorted_regs and unsorted_offsets; additionally,
14866      order[0] has been set to the lowest offset in the list.  Sort
14867      the offsets into order, verifying that they are adjacent, and
14868      check that the register numbers are ascending.  */
14869   if (!compute_offset_order (nops, unsorted_offsets, order,
14870                              check_regs ? unsorted_regs : NULL))
14871     return 0;
14872
14873   if (saved_order)
14874     memcpy (saved_order, order, sizeof order);
14875
14876   if (base)
14877     {
14878       *base = base_reg;
14879
14880       for (i = 0; i < nops; i++)
14881         {
14882           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14883           if (reg_rtxs)
14884             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14885         }
14886
14887       *load_offset = unsorted_offsets[order[0]];
14888     }
14889
14890   if (TARGET_THUMB1
14891       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14892     return 0;
14893
14894   if (unsorted_offsets[order[0]] == 0)
14895     stm_case = 1; /* stmia */
14896   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14897     stm_case = 2; /* stmib */
14898   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14899     stm_case = 3; /* stmda */
14900   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14901     stm_case = 4; /* stmdb */
14902   else
14903     return 0;
14904
14905   if (!multiple_operation_profitable_p (false, nops, 0))
14906     return 0;
14907
14908   return stm_case;
14909 }
14910 \f
14911 /* Routines for use in generating RTL.  */
14912
14913 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14914    the instruction; REGS and MEMS are arrays containing the operands.
14915    BASEREG is the base register to be used in addressing the memory operands.
14916    WBACK_OFFSET is nonzero if the instruction should update the base
14917    register.  */
14918
14919 static rtx
14920 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14921                          HOST_WIDE_INT wback_offset)
14922 {
14923   int i = 0, j;
14924   rtx result;
14925
14926   if (!multiple_operation_profitable_p (false, count, 0))
14927     {
14928       rtx seq;
14929
14930       start_sequence ();
14931
14932       for (i = 0; i < count; i++)
14933         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14934
14935       if (wback_offset != 0)
14936         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14937
14938       seq = get_insns ();
14939       end_sequence ();
14940
14941       return seq;
14942     }
14943
14944   result = gen_rtx_PARALLEL (VOIDmode,
14945                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14946   if (wback_offset != 0)
14947     {
14948       XVECEXP (result, 0, 0)
14949         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14950       i = 1;
14951       count++;
14952     }
14953
14954   for (j = 0; i < count; i++, j++)
14955     XVECEXP (result, 0, i)
14956       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14957
14958   return result;
14959 }
14960
14961 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14962    the instruction; REGS and MEMS are arrays containing the operands.
14963    BASEREG is the base register to be used in addressing the memory operands.
14964    WBACK_OFFSET is nonzero if the instruction should update the base
14965    register.  */
14966
14967 static rtx
14968 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14969                           HOST_WIDE_INT wback_offset)
14970 {
14971   int i = 0, j;
14972   rtx result;
14973
14974   if (GET_CODE (basereg) == PLUS)
14975     basereg = XEXP (basereg, 0);
14976
14977   if (!multiple_operation_profitable_p (false, count, 0))
14978     {
14979       rtx seq;
14980
14981       start_sequence ();
14982
14983       for (i = 0; i < count; i++)
14984         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14985
14986       if (wback_offset != 0)
14987         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14988
14989       seq = get_insns ();
14990       end_sequence ();
14991
14992       return seq;
14993     }
14994
14995   result = gen_rtx_PARALLEL (VOIDmode,
14996                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14997   if (wback_offset != 0)
14998     {
14999       XVECEXP (result, 0, 0)
15000         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
15001       i = 1;
15002       count++;
15003     }
15004
15005   for (j = 0; i < count; i++, j++)
15006     XVECEXP (result, 0, i)
15007       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
15008
15009   return result;
15010 }
15011
15012 /* Generate either a load-multiple or a store-multiple instruction.  This
15013    function can be used in situations where we can start with a single MEM
15014    rtx and adjust its address upwards.
15015    COUNT is the number of operations in the instruction, not counting a
15016    possible update of the base register.  REGS is an array containing the
15017    register operands.
15018    BASEREG is the base register to be used in addressing the memory operands,
15019    which are constructed from BASEMEM.
15020    WRITE_BACK specifies whether the generated instruction should include an
15021    update of the base register.
15022    OFFSETP is used to pass an offset to and from this function; this offset
15023    is not used when constructing the address (instead BASEMEM should have an
15024    appropriate offset in its address), it is used only for setting
15025    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
15026
15027 static rtx
15028 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
15029                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
15030 {
15031   rtx mems[MAX_LDM_STM_OPS];
15032   HOST_WIDE_INT offset = *offsetp;
15033   int i;
15034
15035   gcc_assert (count <= MAX_LDM_STM_OPS);
15036
15037   if (GET_CODE (basereg) == PLUS)
15038     basereg = XEXP (basereg, 0);
15039
15040   for (i = 0; i < count; i++)
15041     {
15042       rtx addr = plus_constant (Pmode, basereg, i * 4);
15043       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15044       offset += 4;
15045     }
15046
15047   if (write_back)
15048     *offsetp = offset;
15049
15050   if (is_load)
15051     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15052                                     write_back ? 4 * count : 0);
15053   else
15054     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15055                                      write_back ? 4 * count : 0);
15056 }
15057
15058 rtx
15059 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15060                        rtx basemem, HOST_WIDE_INT *offsetp)
15061 {
15062   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15063                               offsetp);
15064 }
15065
15066 rtx
15067 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15068                         rtx basemem, HOST_WIDE_INT *offsetp)
15069 {
15070   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15071                               offsetp);
15072 }
15073
15074 /* Called from a peephole2 expander to turn a sequence of loads into an
15075    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
15076    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
15077    is true if we can reorder the registers because they are used commutatively
15078    subsequently.
15079    Returns true iff we could generate a new instruction.  */
15080
15081 bool
15082 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15083 {
15084   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15085   rtx mems[MAX_LDM_STM_OPS];
15086   int i, j, base_reg;
15087   rtx base_reg_rtx;
15088   HOST_WIDE_INT offset;
15089   int write_back = FALSE;
15090   int ldm_case;
15091   rtx addr;
15092
15093   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15094                                      &base_reg, &offset, !sort_regs);
15095
15096   if (ldm_case == 0)
15097     return false;
15098
15099   if (sort_regs)
15100     for (i = 0; i < nops - 1; i++)
15101       for (j = i + 1; j < nops; j++)
15102         if (regs[i] > regs[j])
15103           {
15104             int t = regs[i];
15105             regs[i] = regs[j];
15106             regs[j] = t;
15107           }
15108   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15109
15110   if (TARGET_THUMB1)
15111     {
15112       gcc_assert (ldm_case == 1 || ldm_case == 5);
15113
15114       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
15115       write_back = true;
15116       for (i = 0; i < nops; i++)
15117         if (base_reg == regs[i])
15118           write_back = false;
15119
15120       /* Ensure the base is dead if it is updated.  */
15121       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15122         return false;
15123     }
15124
15125   if (ldm_case == 5)
15126     {
15127       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15128       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15129       offset = 0;
15130       base_reg_rtx = newbase;
15131     }
15132
15133   for (i = 0; i < nops; i++)
15134     {
15135       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15136       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15137                                               SImode, addr, 0);
15138     }
15139   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15140                                       write_back ? offset + i * 4 : 0));
15141   return true;
15142 }
15143
15144 /* Called from a peephole2 expander to turn a sequence of stores into an
15145    STM instruction.  OPERANDS are the operands found by the peephole matcher;
15146    NOPS indicates how many separate stores we are trying to combine.
15147    Returns true iff we could generate a new instruction.  */
15148
15149 bool
15150 gen_stm_seq (rtx *operands, int nops)
15151 {
15152   int i;
15153   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15154   rtx mems[MAX_LDM_STM_OPS];
15155   int base_reg;
15156   rtx base_reg_rtx;
15157   HOST_WIDE_INT offset;
15158   int write_back = FALSE;
15159   int stm_case;
15160   rtx addr;
15161   bool base_reg_dies;
15162
15163   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15164                                       mem_order, &base_reg, &offset, true);
15165
15166   if (stm_case == 0)
15167     return false;
15168
15169   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15170
15171   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15172   if (TARGET_THUMB1)
15173     {
15174       gcc_assert (base_reg_dies);
15175       write_back = TRUE;
15176     }
15177
15178   if (stm_case == 5)
15179     {
15180       gcc_assert (base_reg_dies);
15181       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15182       offset = 0;
15183     }
15184
15185   addr = plus_constant (Pmode, base_reg_rtx, offset);
15186
15187   for (i = 0; i < nops; i++)
15188     {
15189       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15190       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15191                                               SImode, addr, 0);
15192     }
15193   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15194                                        write_back ? offset + i * 4 : 0));
15195   return true;
15196 }
15197
15198 /* Called from a peephole2 expander to turn a sequence of stores that are
15199    preceded by constant loads into an STM instruction.  OPERANDS are the
15200    operands found by the peephole matcher; NOPS indicates how many
15201    separate stores we are trying to combine; there are 2 * NOPS
15202    instructions in the peephole.
15203    Returns true iff we could generate a new instruction.  */
15204
15205 bool
15206 gen_const_stm_seq (rtx *operands, int nops)
15207 {
15208   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15209   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15210   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15211   rtx mems[MAX_LDM_STM_OPS];
15212   int base_reg;
15213   rtx base_reg_rtx;
15214   HOST_WIDE_INT offset;
15215   int write_back = FALSE;
15216   int stm_case;
15217   rtx addr;
15218   bool base_reg_dies;
15219   int i, j;
15220   HARD_REG_SET allocated;
15221
15222   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15223                                       mem_order, &base_reg, &offset, false);
15224
15225   if (stm_case == 0)
15226     return false;
15227
15228   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15229
15230   /* If the same register is used more than once, try to find a free
15231      register.  */
15232   CLEAR_HARD_REG_SET (allocated);
15233   for (i = 0; i < nops; i++)
15234     {
15235       for (j = i + 1; j < nops; j++)
15236         if (regs[i] == regs[j])
15237           {
15238             rtx t = peep2_find_free_register (0, nops * 2,
15239                                               TARGET_THUMB1 ? "l" : "r",
15240                                               SImode, &allocated);
15241             if (t == NULL_RTX)
15242               return false;
15243             reg_rtxs[i] = t;
15244             regs[i] = REGNO (t);
15245           }
15246     }
15247
15248   /* Compute an ordering that maps the register numbers to an ascending
15249      sequence.  */
15250   reg_order[0] = 0;
15251   for (i = 0; i < nops; i++)
15252     if (regs[i] < regs[reg_order[0]])
15253       reg_order[0] = i;
15254
15255   for (i = 1; i < nops; i++)
15256     {
15257       int this_order = reg_order[i - 1];
15258       for (j = 0; j < nops; j++)
15259         if (regs[j] > regs[reg_order[i - 1]]
15260             && (this_order == reg_order[i - 1]
15261                 || regs[j] < regs[this_order]))
15262           this_order = j;
15263       reg_order[i] = this_order;
15264     }
15265
15266   /* Ensure that registers that must be live after the instruction end
15267      up with the correct value.  */
15268   for (i = 0; i < nops; i++)
15269     {
15270       int this_order = reg_order[i];
15271       if ((this_order != mem_order[i]
15272            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15273           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15274         return false;
15275     }
15276
15277   /* Load the constants.  */
15278   for (i = 0; i < nops; i++)
15279     {
15280       rtx op = operands[2 * nops + mem_order[i]];
15281       sorted_regs[i] = regs[reg_order[i]];
15282       emit_move_insn (reg_rtxs[reg_order[i]], op);
15283     }
15284
15285   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15286
15287   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15288   if (TARGET_THUMB1)
15289     {
15290       gcc_assert (base_reg_dies);
15291       write_back = TRUE;
15292     }
15293
15294   if (stm_case == 5)
15295     {
15296       gcc_assert (base_reg_dies);
15297       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15298       offset = 0;
15299     }
15300
15301   addr = plus_constant (Pmode, base_reg_rtx, offset);
15302
15303   for (i = 0; i < nops; i++)
15304     {
15305       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15306       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15307                                               SImode, addr, 0);
15308     }
15309   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15310                                        write_back ? offset + i * 4 : 0));
15311   return true;
15312 }
15313
15314 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15315    unaligned copies on processors which support unaligned semantics for those
15316    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
15317    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15318    An interleave factor of 1 (the minimum) will perform no interleaving.
15319    Load/store multiple are used for aligned addresses where possible.  */
15320
15321 static void
15322 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15323                                    HOST_WIDE_INT length,
15324                                    unsigned int interleave_factor)
15325 {
15326   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15327   int *regnos = XALLOCAVEC (int, interleave_factor);
15328   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15329   HOST_WIDE_INT i, j;
15330   HOST_WIDE_INT remaining = length, words;
15331   rtx halfword_tmp = NULL, byte_tmp = NULL;
15332   rtx dst, src;
15333   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15334   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15335   HOST_WIDE_INT srcoffset, dstoffset;
15336   HOST_WIDE_INT src_autoinc, dst_autoinc;
15337   rtx mem, addr;
15338
15339   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15340
15341   /* Use hard registers if we have aligned source or destination so we can use
15342      load/store multiple with contiguous registers.  */
15343   if (dst_aligned || src_aligned)
15344     for (i = 0; i < interleave_factor; i++)
15345       regs[i] = gen_rtx_REG (SImode, i);
15346   else
15347     for (i = 0; i < interleave_factor; i++)
15348       regs[i] = gen_reg_rtx (SImode);
15349
15350   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15351   src = copy_addr_to_reg (XEXP (srcbase, 0));
15352
15353   srcoffset = dstoffset = 0;
15354
15355   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15356      For copying the last bytes we want to subtract this offset again.  */
15357   src_autoinc = dst_autoinc = 0;
15358
15359   for (i = 0; i < interleave_factor; i++)
15360     regnos[i] = i;
15361
15362   /* Copy BLOCK_SIZE_BYTES chunks.  */
15363
15364   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15365     {
15366       /* Load words.  */
15367       if (src_aligned && interleave_factor > 1)
15368         {
15369           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15370                                             TRUE, srcbase, &srcoffset));
15371           src_autoinc += UNITS_PER_WORD * interleave_factor;
15372         }
15373       else
15374         {
15375           for (j = 0; j < interleave_factor; j++)
15376             {
15377               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15378                                                  - src_autoinc));
15379               mem = adjust_automodify_address (srcbase, SImode, addr,
15380                                                srcoffset + j * UNITS_PER_WORD);
15381               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15382             }
15383           srcoffset += block_size_bytes;
15384         }
15385
15386       /* Store words.  */
15387       if (dst_aligned && interleave_factor > 1)
15388         {
15389           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15390                                              TRUE, dstbase, &dstoffset));
15391           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15392         }
15393       else
15394         {
15395           for (j = 0; j < interleave_factor; j++)
15396             {
15397               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15398                                                  - dst_autoinc));
15399               mem = adjust_automodify_address (dstbase, SImode, addr,
15400                                                dstoffset + j * UNITS_PER_WORD);
15401               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15402             }
15403           dstoffset += block_size_bytes;
15404         }
15405
15406       remaining -= block_size_bytes;
15407     }
15408
15409   /* Copy any whole words left (note these aren't interleaved with any
15410      subsequent halfword/byte load/stores in the interests of simplicity).  */
15411
15412   words = remaining / UNITS_PER_WORD;
15413
15414   gcc_assert (words < interleave_factor);
15415
15416   if (src_aligned && words > 1)
15417     {
15418       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15419                                         &srcoffset));
15420       src_autoinc += UNITS_PER_WORD * words;
15421     }
15422   else
15423     {
15424       for (j = 0; j < words; j++)
15425         {
15426           addr = plus_constant (Pmode, src,
15427                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15428           mem = adjust_automodify_address (srcbase, SImode, addr,
15429                                            srcoffset + j * UNITS_PER_WORD);
15430           if (src_aligned)
15431             emit_move_insn (regs[j], mem);
15432           else
15433             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15434         }
15435       srcoffset += words * UNITS_PER_WORD;
15436     }
15437
15438   if (dst_aligned && words > 1)
15439     {
15440       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15441                                          &dstoffset));
15442       dst_autoinc += words * UNITS_PER_WORD;
15443     }
15444   else
15445     {
15446       for (j = 0; j < words; j++)
15447         {
15448           addr = plus_constant (Pmode, dst,
15449                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15450           mem = adjust_automodify_address (dstbase, SImode, addr,
15451                                            dstoffset + j * UNITS_PER_WORD);
15452           if (dst_aligned)
15453             emit_move_insn (mem, regs[j]);
15454           else
15455             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15456         }
15457       dstoffset += words * UNITS_PER_WORD;
15458     }
15459
15460   remaining -= words * UNITS_PER_WORD;
15461
15462   gcc_assert (remaining < 4);
15463
15464   /* Copy a halfword if necessary.  */
15465
15466   if (remaining >= 2)
15467     {
15468       halfword_tmp = gen_reg_rtx (SImode);
15469
15470       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15471       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15472       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15473
15474       /* Either write out immediately, or delay until we've loaded the last
15475          byte, depending on interleave factor.  */
15476       if (interleave_factor == 1)
15477         {
15478           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15479           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15480           emit_insn (gen_unaligned_storehi (mem,
15481                        gen_lowpart (HImode, halfword_tmp)));
15482           halfword_tmp = NULL;
15483           dstoffset += 2;
15484         }
15485
15486       remaining -= 2;
15487       srcoffset += 2;
15488     }
15489
15490   gcc_assert (remaining < 2);
15491
15492   /* Copy last byte.  */
15493
15494   if ((remaining & 1) != 0)
15495     {
15496       byte_tmp = gen_reg_rtx (SImode);
15497
15498       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15499       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15500       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15501
15502       if (interleave_factor == 1)
15503         {
15504           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15505           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15506           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15507           byte_tmp = NULL;
15508           dstoffset++;
15509         }
15510
15511       remaining--;
15512       srcoffset++;
15513     }
15514
15515   /* Store last halfword if we haven't done so already.  */
15516
15517   if (halfword_tmp)
15518     {
15519       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15520       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15521       emit_insn (gen_unaligned_storehi (mem,
15522                    gen_lowpart (HImode, halfword_tmp)));
15523       dstoffset += 2;
15524     }
15525
15526   /* Likewise for last byte.  */
15527
15528   if (byte_tmp)
15529     {
15530       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15531       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15532       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15533       dstoffset++;
15534     }
15535
15536   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15537 }
15538
15539 /* From mips_adjust_block_mem:
15540
15541    Helper function for doing a loop-based block operation on memory
15542    reference MEM.  Each iteration of the loop will operate on LENGTH
15543    bytes of MEM.
15544
15545    Create a new base register for use within the loop and point it to
15546    the start of MEM.  Create a new memory reference that uses this
15547    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15548
15549 static void
15550 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15551                       rtx *loop_mem)
15552 {
15553   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15554
15555   /* Although the new mem does not refer to a known location,
15556      it does keep up to LENGTH bytes of alignment.  */
15557   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15558   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15559 }
15560
15561 /* From mips_block_move_loop:
15562
15563    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15564    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15565    the memory regions do not overlap.  */
15566
15567 static void
15568 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15569                                unsigned int interleave_factor,
15570                                HOST_WIDE_INT bytes_per_iter)
15571 {
15572   rtx src_reg, dest_reg, final_src, test;
15573   HOST_WIDE_INT leftover;
15574
15575   leftover = length % bytes_per_iter;
15576   length -= leftover;
15577
15578   /* Create registers and memory references for use within the loop.  */
15579   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15580   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15581
15582   /* Calculate the value that SRC_REG should have after the last iteration of
15583      the loop.  */
15584   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15585                                    0, 0, OPTAB_WIDEN);
15586
15587   /* Emit the start of the loop.  */
15588   rtx_code_label *label = gen_label_rtx ();
15589   emit_label (label);
15590
15591   /* Emit the loop body.  */
15592   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15593                                      interleave_factor);
15594
15595   /* Move on to the next block.  */
15596   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15597   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15598
15599   /* Emit the loop condition.  */
15600   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15601   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15602
15603   /* Mop up any left-over bytes.  */
15604   if (leftover)
15605     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15606 }
15607
15608 /* Emit a block move when either the source or destination is unaligned (not
15609    aligned to a four-byte boundary).  This may need further tuning depending on
15610    core type, optimize_size setting, etc.  */
15611
15612 static int
15613 arm_cpymemqi_unaligned (rtx *operands)
15614 {
15615   HOST_WIDE_INT length = INTVAL (operands[2]);
15616
15617   if (optimize_size)
15618     {
15619       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15620       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15621       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15622          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15623          or dst_aligned though: allow more interleaving in those cases since the
15624          resulting code can be smaller.  */
15625       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15626       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15627
15628       if (length > 12)
15629         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15630                                        interleave_factor, bytes_per_iter);
15631       else
15632         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15633                                            interleave_factor);
15634     }
15635   else
15636     {
15637       /* Note that the loop created by arm_block_move_unaligned_loop may be
15638          subject to loop unrolling, which makes tuning this condition a little
15639          redundant.  */
15640       if (length > 32)
15641         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15642       else
15643         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15644     }
15645
15646   return 1;
15647 }
15648
15649 int
15650 arm_gen_cpymemqi (rtx *operands)
15651 {
15652   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15653   HOST_WIDE_INT srcoffset, dstoffset;
15654   rtx src, dst, srcbase, dstbase;
15655   rtx part_bytes_reg = NULL;
15656   rtx mem;
15657
15658   if (!CONST_INT_P (operands[2])
15659       || !CONST_INT_P (operands[3])
15660       || INTVAL (operands[2]) > 64)
15661     return 0;
15662
15663   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15664     return arm_cpymemqi_unaligned (operands);
15665
15666   if (INTVAL (operands[3]) & 3)
15667     return 0;
15668
15669   dstbase = operands[0];
15670   srcbase = operands[1];
15671
15672   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15673   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15674
15675   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15676   out_words_to_go = INTVAL (operands[2]) / 4;
15677   last_bytes = INTVAL (operands[2]) & 3;
15678   dstoffset = srcoffset = 0;
15679
15680   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15681     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15682
15683   while (in_words_to_go >= 2)
15684     {
15685       if (in_words_to_go > 4)
15686         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15687                                           TRUE, srcbase, &srcoffset));
15688       else
15689         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15690                                           src, FALSE, srcbase,
15691                                           &srcoffset));
15692
15693       if (out_words_to_go)
15694         {
15695           if (out_words_to_go > 4)
15696             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15697                                                TRUE, dstbase, &dstoffset));
15698           else if (out_words_to_go != 1)
15699             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15700                                                out_words_to_go, dst,
15701                                                (last_bytes == 0
15702                                                 ? FALSE : TRUE),
15703                                                dstbase, &dstoffset));
15704           else
15705             {
15706               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15707               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15708               if (last_bytes != 0)
15709                 {
15710                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15711                   dstoffset += 4;
15712                 }
15713             }
15714         }
15715
15716       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15717       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15718     }
15719
15720   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15721   if (out_words_to_go)
15722     {
15723       rtx sreg;
15724
15725       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15726       sreg = copy_to_reg (mem);
15727
15728       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15729       emit_move_insn (mem, sreg);
15730       in_words_to_go--;
15731
15732       gcc_assert (!in_words_to_go);     /* Sanity check */
15733     }
15734
15735   if (in_words_to_go)
15736     {
15737       gcc_assert (in_words_to_go > 0);
15738
15739       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15740       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15741     }
15742
15743   gcc_assert (!last_bytes || part_bytes_reg);
15744
15745   if (BYTES_BIG_ENDIAN && last_bytes)
15746     {
15747       rtx tmp = gen_reg_rtx (SImode);
15748
15749       /* The bytes we want are in the top end of the word.  */
15750       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15751                               GEN_INT (8 * (4 - last_bytes))));
15752       part_bytes_reg = tmp;
15753
15754       while (last_bytes)
15755         {
15756           mem = adjust_automodify_address (dstbase, QImode,
15757                                            plus_constant (Pmode, dst,
15758                                                           last_bytes - 1),
15759                                            dstoffset + last_bytes - 1);
15760           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15761
15762           if (--last_bytes)
15763             {
15764               tmp = gen_reg_rtx (SImode);
15765               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15766               part_bytes_reg = tmp;
15767             }
15768         }
15769
15770     }
15771   else
15772     {
15773       if (last_bytes > 1)
15774         {
15775           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15776           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15777           last_bytes -= 2;
15778           if (last_bytes)
15779             {
15780               rtx tmp = gen_reg_rtx (SImode);
15781               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15782               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15783               part_bytes_reg = tmp;
15784               dstoffset += 2;
15785             }
15786         }
15787
15788       if (last_bytes)
15789         {
15790           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15791           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15792         }
15793     }
15794
15795   return 1;
15796 }
15797
15798 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15799 by mode size.  */
15800 inline static rtx
15801 next_consecutive_mem (rtx mem)
15802 {
15803   machine_mode mode = GET_MODE (mem);
15804   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15805   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15806
15807   return adjust_automodify_address (mem, mode, addr, offset);
15808 }
15809
15810 /* Copy using LDRD/STRD instructions whenever possible.
15811    Returns true upon success. */
15812 bool
15813 gen_cpymem_ldrd_strd (rtx *operands)
15814 {
15815   unsigned HOST_WIDE_INT len;
15816   HOST_WIDE_INT align;
15817   rtx src, dst, base;
15818   rtx reg0;
15819   bool src_aligned, dst_aligned;
15820   bool src_volatile, dst_volatile;
15821
15822   gcc_assert (CONST_INT_P (operands[2]));
15823   gcc_assert (CONST_INT_P (operands[3]));
15824
15825   len = UINTVAL (operands[2]);
15826   if (len > 64)
15827     return false;
15828
15829   /* Maximum alignment we can assume for both src and dst buffers.  */
15830   align = INTVAL (operands[3]);
15831
15832   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15833     return false;
15834
15835   /* Place src and dst addresses in registers
15836      and update the corresponding mem rtx.  */
15837   dst = operands[0];
15838   dst_volatile = MEM_VOLATILE_P (dst);
15839   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15840   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15841   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15842
15843   src = operands[1];
15844   src_volatile = MEM_VOLATILE_P (src);
15845   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15846   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15847   src = adjust_automodify_address (src, VOIDmode, base, 0);
15848
15849   if (!unaligned_access && !(src_aligned && dst_aligned))
15850     return false;
15851
15852   if (src_volatile || dst_volatile)
15853     return false;
15854
15855   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15856   if (!(dst_aligned || src_aligned))
15857     return arm_gen_cpymemqi (operands);
15858
15859   /* If the either src or dst is unaligned we'll be accessing it as pairs
15860      of unaligned SImode accesses.  Otherwise we can generate DImode
15861      ldrd/strd instructions.  */
15862   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15863   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15864
15865   while (len >= 8)
15866     {
15867       len -= 8;
15868       reg0 = gen_reg_rtx (DImode);
15869       rtx first_reg = NULL_RTX;
15870       rtx second_reg = NULL_RTX;
15871
15872       if (!src_aligned || !dst_aligned)
15873         {
15874           if (BYTES_BIG_ENDIAN)
15875             {
15876               second_reg = gen_lowpart (SImode, reg0);
15877               first_reg = gen_highpart_mode (SImode, DImode, reg0);
15878             }
15879           else
15880             {
15881               first_reg = gen_lowpart (SImode, reg0);
15882               second_reg = gen_highpart_mode (SImode, DImode, reg0);
15883             }
15884         }
15885       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15886         emit_move_insn (reg0, src);
15887       else if (src_aligned)
15888         emit_insn (gen_unaligned_loaddi (reg0, src));
15889       else
15890         {
15891           emit_insn (gen_unaligned_loadsi (first_reg, src));
15892           src = next_consecutive_mem (src);
15893           emit_insn (gen_unaligned_loadsi (second_reg, src));
15894         }
15895
15896       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15897         emit_move_insn (dst, reg0);
15898       else if (dst_aligned)
15899         emit_insn (gen_unaligned_storedi (dst, reg0));
15900       else
15901         {
15902           emit_insn (gen_unaligned_storesi (dst, first_reg));
15903           dst = next_consecutive_mem (dst);
15904           emit_insn (gen_unaligned_storesi (dst, second_reg));
15905         }
15906
15907       src = next_consecutive_mem (src);
15908       dst = next_consecutive_mem (dst);
15909     }
15910
15911   gcc_assert (len < 8);
15912   if (len >= 4)
15913     {
15914       /* More than a word but less than a double-word to copy.  Copy a word.  */
15915       reg0 = gen_reg_rtx (SImode);
15916       src = adjust_address (src, SImode, 0);
15917       dst = adjust_address (dst, SImode, 0);
15918       if (src_aligned)
15919         emit_move_insn (reg0, src);
15920       else
15921         emit_insn (gen_unaligned_loadsi (reg0, src));
15922
15923       if (dst_aligned)
15924         emit_move_insn (dst, reg0);
15925       else
15926         emit_insn (gen_unaligned_storesi (dst, reg0));
15927
15928       src = next_consecutive_mem (src);
15929       dst = next_consecutive_mem (dst);
15930       len -= 4;
15931     }
15932
15933   if (len == 0)
15934     return true;
15935
15936   /* Copy the remaining bytes.  */
15937   if (len >= 2)
15938     {
15939       dst = adjust_address (dst, HImode, 0);
15940       src = adjust_address (src, HImode, 0);
15941       reg0 = gen_reg_rtx (SImode);
15942       if (src_aligned)
15943         emit_insn (gen_zero_extendhisi2 (reg0, src));
15944       else
15945         emit_insn (gen_unaligned_loadhiu (reg0, src));
15946
15947       if (dst_aligned)
15948         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15949       else
15950         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15951
15952       src = next_consecutive_mem (src);
15953       dst = next_consecutive_mem (dst);
15954       if (len == 2)
15955         return true;
15956     }
15957
15958   dst = adjust_address (dst, QImode, 0);
15959   src = adjust_address (src, QImode, 0);
15960   reg0 = gen_reg_rtx (QImode);
15961   emit_move_insn (reg0, src);
15962   emit_move_insn (dst, reg0);
15963   return true;
15964 }
15965
15966 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15967    into its component 32-bit subregs.  OP2 may be an immediate
15968    constant and we want to simplify it in that case.  */
15969 void
15970 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15971                         rtx *lo_op2, rtx *hi_op2)
15972 {
15973   *lo_op1 = gen_lowpart (SImode, op1);
15974   *hi_op1 = gen_highpart (SImode, op1);
15975   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15976                                  subreg_lowpart_offset (SImode, DImode));
15977   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15978                                  subreg_highpart_offset (SImode, DImode));
15979 }
15980
15981 /* Select a dominance comparison mode if possible for a test of the general
15982    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15983    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15984    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15985    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15986    In all cases OP will be either EQ or NE, but we don't need to know which
15987    here.  If we are unable to support a dominance comparison we return
15988    CC mode.  This will then fail to match for the RTL expressions that
15989    generate this call.  */
15990 machine_mode
15991 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15992 {
15993   enum rtx_code cond1, cond2;
15994   int swapped = 0;
15995
15996   /* Currently we will probably get the wrong result if the individual
15997      comparisons are not simple.  This also ensures that it is safe to
15998      reverse a comparison if necessary.  */
15999   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
16000        != CCmode)
16001       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
16002           != CCmode))
16003     return CCmode;
16004
16005   /* The if_then_else variant of this tests the second condition if the
16006      first passes, but is true if the first fails.  Reverse the first
16007      condition to get a true "inclusive-or" expression.  */
16008   if (cond_or == DOM_CC_NX_OR_Y)
16009     cond1 = reverse_condition (cond1);
16010
16011   /* If the comparisons are not equal, and one doesn't dominate the other,
16012      then we can't do this.  */
16013   if (cond1 != cond2
16014       && !comparison_dominates_p (cond1, cond2)
16015       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
16016     return CCmode;
16017
16018   if (swapped)
16019     std::swap (cond1, cond2);
16020
16021   switch (cond1)
16022     {
16023     case EQ:
16024       if (cond_or == DOM_CC_X_AND_Y)
16025         return CC_DEQmode;
16026
16027       switch (cond2)
16028         {
16029         case EQ: return CC_DEQmode;
16030         case LE: return CC_DLEmode;
16031         case LEU: return CC_DLEUmode;
16032         case GE: return CC_DGEmode;
16033         case GEU: return CC_DGEUmode;
16034         default: gcc_unreachable ();
16035         }
16036
16037     case LT:
16038       if (cond_or == DOM_CC_X_AND_Y)
16039         return CC_DLTmode;
16040
16041       switch (cond2)
16042         {
16043         case  LT:
16044             return CC_DLTmode;
16045         case LE:
16046           return CC_DLEmode;
16047         case NE:
16048           return CC_DNEmode;
16049         default:
16050           gcc_unreachable ();
16051         }
16052
16053     case GT:
16054       if (cond_or == DOM_CC_X_AND_Y)
16055         return CC_DGTmode;
16056
16057       switch (cond2)
16058         {
16059         case GT:
16060           return CC_DGTmode;
16061         case GE:
16062           return CC_DGEmode;
16063         case NE:
16064           return CC_DNEmode;
16065         default:
16066           gcc_unreachable ();
16067         }
16068
16069     case LTU:
16070       if (cond_or == DOM_CC_X_AND_Y)
16071         return CC_DLTUmode;
16072
16073       switch (cond2)
16074         {
16075         case LTU:
16076           return CC_DLTUmode;
16077         case LEU:
16078           return CC_DLEUmode;
16079         case NE:
16080           return CC_DNEmode;
16081         default:
16082           gcc_unreachable ();
16083         }
16084
16085     case GTU:
16086       if (cond_or == DOM_CC_X_AND_Y)
16087         return CC_DGTUmode;
16088
16089       switch (cond2)
16090         {
16091         case GTU:
16092           return CC_DGTUmode;
16093         case GEU:
16094           return CC_DGEUmode;
16095         case NE:
16096           return CC_DNEmode;
16097         default:
16098           gcc_unreachable ();
16099         }
16100
16101     /* The remaining cases only occur when both comparisons are the
16102        same.  */
16103     case NE:
16104       gcc_assert (cond1 == cond2);
16105       return CC_DNEmode;
16106
16107     case LE:
16108       gcc_assert (cond1 == cond2);
16109       return CC_DLEmode;
16110
16111     case GE:
16112       gcc_assert (cond1 == cond2);
16113       return CC_DGEmode;
16114
16115     case LEU:
16116       gcc_assert (cond1 == cond2);
16117       return CC_DLEUmode;
16118
16119     case GEU:
16120       gcc_assert (cond1 == cond2);
16121       return CC_DGEUmode;
16122
16123     default:
16124       gcc_unreachable ();
16125     }
16126 }
16127
16128 machine_mode
16129 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16130 {
16131   /* All floating point compares return CCFP if it is an equality
16132      comparison, and CCFPE otherwise.  */
16133   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16134     {
16135       switch (op)
16136         {
16137         case EQ:
16138         case NE:
16139         case UNORDERED:
16140         case ORDERED:
16141         case UNLT:
16142         case UNLE:
16143         case UNGT:
16144         case UNGE:
16145         case UNEQ:
16146         case LTGT:
16147           return CCFPmode;
16148
16149         case LT:
16150         case LE:
16151         case GT:
16152         case GE:
16153           return CCFPEmode;
16154
16155         default:
16156           gcc_unreachable ();
16157         }
16158     }
16159
16160   /* A compare with a shifted operand.  Because of canonicalization, the
16161      comparison will have to be swapped when we emit the assembler.  */
16162   if (GET_MODE (y) == SImode
16163       && (REG_P (y) || (SUBREG_P (y)))
16164       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16165           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16166           || GET_CODE (x) == ROTATERT))
16167     return CC_SWPmode;
16168
16169   /* A widened compare of the sum of a value plus a carry against a
16170      constant.  This is a representation of RSC.  We want to swap the
16171      result of the comparison at output.  Not valid if the Z bit is
16172      needed.  */
16173   if (GET_MODE (x) == DImode
16174       && GET_CODE (x) == PLUS
16175       && arm_borrow_operation (XEXP (x, 1), DImode)
16176       && CONST_INT_P (y)
16177       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16178            && (op == LE || op == GT))
16179           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16180               && (op == LEU || op == GTU))))
16181     return CC_SWPmode;
16182
16183   /* If X is a constant we want to use CC_RSBmode.  This is
16184      non-canonical, but arm_gen_compare_reg uses this to generate the
16185      correct canonical form.  */
16186   if (GET_MODE (y) == SImode
16187       && (REG_P (y) || SUBREG_P (y))
16188       && CONST_INT_P (x))
16189     return CC_RSBmode;
16190
16191   /* This operation is performed swapped, but since we only rely on the Z
16192      flag we don't need an additional mode.  */
16193   if (GET_MODE (y) == SImode
16194       && (REG_P (y) || (SUBREG_P (y)))
16195       && GET_CODE (x) == NEG
16196       && (op == EQ || op == NE))
16197     return CC_Zmode;
16198
16199   /* This is a special case that is used by combine to allow a
16200      comparison of a shifted byte load to be split into a zero-extend
16201      followed by a comparison of the shifted integer (only valid for
16202      equalities and unsigned inequalities).  */
16203   if (GET_MODE (x) == SImode
16204       && GET_CODE (x) == ASHIFT
16205       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16206       && GET_CODE (XEXP (x, 0)) == SUBREG
16207       && MEM_P (SUBREG_REG (XEXP (x, 0)))
16208       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16209       && (op == EQ || op == NE
16210           || op == GEU || op == GTU || op == LTU || op == LEU)
16211       && CONST_INT_P (y))
16212     return CC_Zmode;
16213
16214   /* A construct for a conditional compare, if the false arm contains
16215      0, then both conditions must be true, otherwise either condition
16216      must be true.  Not all conditions are possible, so CCmode is
16217      returned if it can't be done.  */
16218   if (GET_CODE (x) == IF_THEN_ELSE
16219       && (XEXP (x, 2) == const0_rtx
16220           || XEXP (x, 2) == const1_rtx)
16221       && COMPARISON_P (XEXP (x, 0))
16222       && COMPARISON_P (XEXP (x, 1)))
16223     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16224                                          INTVAL (XEXP (x, 2)));
16225
16226   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
16227   if (GET_CODE (x) == AND
16228       && (op == EQ || op == NE)
16229       && COMPARISON_P (XEXP (x, 0))
16230       && COMPARISON_P (XEXP (x, 1)))
16231     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16232                                          DOM_CC_X_AND_Y);
16233
16234   if (GET_CODE (x) == IOR
16235       && (op == EQ || op == NE)
16236       && COMPARISON_P (XEXP (x, 0))
16237       && COMPARISON_P (XEXP (x, 1)))
16238     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16239                                          DOM_CC_X_OR_Y);
16240
16241   /* An operation (on Thumb) where we want to test for a single bit.
16242      This is done by shifting that bit up into the top bit of a
16243      scratch register; we can then branch on the sign bit.  */
16244   if (TARGET_THUMB1
16245       && GET_MODE (x) == SImode
16246       && (op == EQ || op == NE)
16247       && GET_CODE (x) == ZERO_EXTRACT
16248       && XEXP (x, 1) == const1_rtx)
16249     return CC_Nmode;
16250
16251   /* An operation that sets the condition codes as a side-effect, the
16252      V flag is not set correctly, so we can only use comparisons where
16253      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
16254      instead.)  */
16255   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
16256   if (GET_MODE (x) == SImode
16257       && y == const0_rtx
16258       && (op == EQ || op == NE || op == LT || op == GE)
16259       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16260           || GET_CODE (x) == AND || GET_CODE (x) == IOR
16261           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16262           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16263           || GET_CODE (x) == LSHIFTRT
16264           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16265           || GET_CODE (x) == ROTATERT
16266           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16267     return CC_NZmode;
16268
16269   /* A comparison of ~reg with a const is really a special
16270      canoncialization of compare (~const, reg), which is a reverse
16271      subtract operation.  We may not get here if CONST is 0, but that
16272      doesn't matter because ~0 isn't a valid immediate for RSB.  */
16273   if (GET_MODE (x) == SImode
16274       && GET_CODE (x) == NOT
16275       && CONST_INT_P (y))
16276     return CC_RSBmode;
16277
16278   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16279     return CC_Zmode;
16280
16281   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16282       && GET_CODE (x) == PLUS
16283       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16284     return CC_Cmode;
16285
16286   if (GET_MODE (x) == DImode
16287       && GET_CODE (x) == PLUS
16288       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16289       && CONST_INT_P (y)
16290       && UINTVAL (y) == 0x800000000
16291       && (op == GEU || op == LTU))
16292     return CC_ADCmode;
16293
16294   if (GET_MODE (x) == DImode
16295       && (op == GE || op == LT)
16296       && GET_CODE (x) == SIGN_EXTEND
16297       && ((GET_CODE (y) == PLUS
16298            && arm_borrow_operation (XEXP (y, 0), DImode))
16299           || arm_borrow_operation (y, DImode)))
16300     return CC_NVmode;
16301
16302   if (GET_MODE (x) == DImode
16303       && (op == GEU || op == LTU)
16304       && GET_CODE (x) == ZERO_EXTEND
16305       && ((GET_CODE (y) == PLUS
16306            && arm_borrow_operation (XEXP (y, 0), DImode))
16307           || arm_borrow_operation (y, DImode)))
16308     return CC_Bmode;
16309
16310   if (GET_MODE (x) == DImode
16311       && (op == EQ || op == NE)
16312       && (GET_CODE (x) == PLUS
16313           || GET_CODE (x) == MINUS)
16314       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16315           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16316       && GET_CODE (y) == SIGN_EXTEND
16317       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16318     return CC_Vmode;
16319
16320   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16321     return GET_MODE (x);
16322
16323   return CCmode;
16324 }
16325
16326 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
16327    the sequence of instructions needed to generate a suitable condition
16328    code register.  Return the CC register result.  */
16329 static rtx
16330 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16331 {
16332   machine_mode mode;
16333   rtx cc_reg;
16334
16335     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
16336   gcc_assert (TARGET_32BIT);
16337   gcc_assert (!CONST_INT_P (x));
16338
16339   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16340                                   subreg_lowpart_offset (SImode, DImode));
16341   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16342                                   subreg_highpart_offset (SImode, DImode));
16343   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16344                                   subreg_lowpart_offset (SImode, DImode));
16345   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16346                                   subreg_highpart_offset (SImode, DImode));
16347   switch (code)
16348     {
16349     case EQ:
16350     case NE:
16351       {
16352         if (y_lo == const0_rtx || y_hi == const0_rtx)
16353           {
16354             if (y_lo != const0_rtx)
16355               {
16356                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16357
16358                 gcc_assert (y_hi == const0_rtx);
16359                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16360                 if (!arm_add_operand (y_lo, SImode))
16361                   y_lo = force_reg (SImode, y_lo);
16362                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16363                 x_lo = scratch2;
16364               }
16365             else if (y_hi != const0_rtx)
16366               {
16367                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16368
16369                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16370                 if (!arm_add_operand (y_hi, SImode))
16371                   y_hi = force_reg (SImode, y_hi);
16372                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16373                 x_hi = scratch2;
16374               }
16375
16376             if (!scratch)
16377               {
16378                 gcc_assert (!reload_completed);
16379                 scratch = gen_rtx_SCRATCH (SImode);
16380               }
16381
16382             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16383             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16384
16385             rtx set
16386               = gen_rtx_SET (cc_reg,
16387                              gen_rtx_COMPARE (CC_NZmode,
16388                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16389                                               const0_rtx));
16390             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16391                                                               clobber)));
16392             return cc_reg;
16393           }
16394
16395         if (!arm_add_operand (y_lo, SImode))
16396           y_lo = force_reg (SImode, y_lo);
16397
16398         if (!arm_add_operand (y_hi, SImode))
16399           y_hi = force_reg (SImode, y_hi);
16400
16401         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16402         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16403         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16404         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16405         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16406
16407         emit_insn (gen_rtx_SET (cc_reg,
16408                                 gen_rtx_COMPARE (mode, conjunction,
16409                                                  const0_rtx)));
16410         return cc_reg;
16411       }
16412
16413     case LT:
16414     case GE:
16415       {
16416         if (y_lo == const0_rtx)
16417           {
16418             /* If the low word of y is 0, then this is simply a normal
16419                compare of the upper words.  */
16420             if (!arm_add_operand (y_hi, SImode))
16421               y_hi = force_reg (SImode, y_hi);
16422
16423             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16424           }
16425
16426         if (!arm_add_operand (y_lo, SImode))
16427           y_lo = force_reg (SImode, y_lo);
16428
16429         rtx cmp1
16430           = gen_rtx_LTU (DImode,
16431                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16432                          const0_rtx);
16433
16434         if (!scratch)
16435           scratch = gen_rtx_SCRATCH (SImode);
16436
16437         if (!arm_not_operand (y_hi, SImode))
16438           y_hi = force_reg (SImode, y_hi);
16439
16440         rtx_insn *insn;
16441         if (y_hi == const0_rtx)
16442           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16443                                                            cmp1));
16444         else if (CONST_INT_P (y_hi))
16445           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16446                                                              y_hi, cmp1));
16447         else
16448           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16449                                                          cmp1));
16450         return SET_DEST (single_set (insn));
16451       }
16452
16453     case LE:
16454     case GT:
16455       {
16456         /* During expansion, we only expect to get here if y is a
16457            constant that we want to handle, otherwise we should have
16458            swapped the operands already.  */
16459         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16460
16461         if (!const_ok_for_arm (INTVAL (y_lo)))
16462           y_lo = force_reg (SImode, y_lo);
16463
16464         /* Perform a reverse subtract and compare.  */
16465         rtx cmp1
16466           = gen_rtx_LTU (DImode,
16467                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16468                          const0_rtx);
16469         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16470                                                                  x_hi, cmp1));
16471         return SET_DEST (single_set (insn));
16472       }
16473
16474     case LTU:
16475     case GEU:
16476       {
16477         if (y_lo == const0_rtx)
16478           {
16479             /* If the low word of y is 0, then this is simply a normal
16480                compare of the upper words.  */
16481             if (!arm_add_operand (y_hi, SImode))
16482               y_hi = force_reg (SImode, y_hi);
16483
16484             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16485           }
16486
16487         if (!arm_add_operand (y_lo, SImode))
16488           y_lo = force_reg (SImode, y_lo);
16489
16490         rtx cmp1
16491           = gen_rtx_LTU (DImode,
16492                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16493                          const0_rtx);
16494
16495         if (!scratch)
16496           scratch = gen_rtx_SCRATCH (SImode);
16497         if (!arm_not_operand (y_hi, SImode))
16498           y_hi = force_reg (SImode, y_hi);
16499
16500         rtx_insn *insn;
16501         if (y_hi == const0_rtx)
16502           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16503                                                           cmp1));
16504         else if (CONST_INT_P (y_hi))
16505           {
16506             /* Constant is viewed as unsigned when zero-extended.  */
16507             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16508             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16509                                                               y_hi, cmp1));
16510           }
16511         else
16512           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16513                                                         cmp1));
16514         return SET_DEST (single_set (insn));
16515       }
16516
16517     case LEU:
16518     case GTU:
16519       {
16520         /* During expansion, we only expect to get here if y is a
16521            constant that we want to handle, otherwise we should have
16522            swapped the operands already.  */
16523         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16524
16525         if (!const_ok_for_arm (INTVAL (y_lo)))
16526           y_lo = force_reg (SImode, y_lo);
16527
16528         /* Perform a reverse subtract and compare.  */
16529         rtx cmp1
16530           = gen_rtx_LTU (DImode,
16531                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16532                          const0_rtx);
16533         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16534         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16535                                                                 x_hi, cmp1));
16536         return SET_DEST (single_set (insn));
16537       }
16538
16539     default:
16540       gcc_unreachable ();
16541     }
16542 }
16543
16544 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16545    return the rtx for register 0 in the proper mode.  */
16546 rtx
16547 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16548 {
16549   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16550     return arm_gen_dicompare_reg (code, x, y, scratch);
16551
16552   machine_mode mode = SELECT_CC_MODE (code, x, y);
16553   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16554   if (mode == CC_RSBmode)
16555     {
16556       if (!scratch)
16557         scratch = gen_rtx_SCRATCH (SImode);
16558       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16559                                               GEN_INT (~UINTVAL (x)), y));
16560     }
16561   else
16562     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16563
16564   return cc_reg;
16565 }
16566
16567 /* Generate a sequence of insns that will generate the correct return
16568    address mask depending on the physical architecture that the program
16569    is running on.  */
16570 rtx
16571 arm_gen_return_addr_mask (void)
16572 {
16573   rtx reg = gen_reg_rtx (Pmode);
16574
16575   emit_insn (gen_return_addr_mask (reg));
16576   return reg;
16577 }
16578
16579 void
16580 arm_reload_in_hi (rtx *operands)
16581 {
16582   rtx ref = operands[1];
16583   rtx base, scratch;
16584   HOST_WIDE_INT offset = 0;
16585
16586   if (SUBREG_P (ref))
16587     {
16588       offset = SUBREG_BYTE (ref);
16589       ref = SUBREG_REG (ref);
16590     }
16591
16592   if (REG_P (ref))
16593     {
16594       /* We have a pseudo which has been spilt onto the stack; there
16595          are two cases here: the first where there is a simple
16596          stack-slot replacement and a second where the stack-slot is
16597          out of range, or is used as a subreg.  */
16598       if (reg_equiv_mem (REGNO (ref)))
16599         {
16600           ref = reg_equiv_mem (REGNO (ref));
16601           base = find_replacement (&XEXP (ref, 0));
16602         }
16603       else
16604         /* The slot is out of range, or was dressed up in a SUBREG.  */
16605         base = reg_equiv_address (REGNO (ref));
16606
16607       /* PR 62554: If there is no equivalent memory location then just move
16608          the value as an SImode register move.  This happens when the target
16609          architecture variant does not have an HImode register move.  */
16610       if (base == NULL)
16611         {
16612           gcc_assert (REG_P (operands[0]));
16613           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16614                                 gen_rtx_SUBREG (SImode, ref, 0)));
16615           return;
16616         }
16617     }
16618   else
16619     base = find_replacement (&XEXP (ref, 0));
16620
16621   /* Handle the case where the address is too complex to be offset by 1.  */
16622   if (GET_CODE (base) == MINUS
16623       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16624     {
16625       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16626
16627       emit_set_insn (base_plus, base);
16628       base = base_plus;
16629     }
16630   else if (GET_CODE (base) == PLUS)
16631     {
16632       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16633       HOST_WIDE_INT hi, lo;
16634
16635       offset += INTVAL (XEXP (base, 1));
16636       base = XEXP (base, 0);
16637
16638       /* Rework the address into a legal sequence of insns.  */
16639       /* Valid range for lo is -4095 -> 4095 */
16640       lo = (offset >= 0
16641             ? (offset & 0xfff)
16642             : -((-offset) & 0xfff));
16643
16644       /* Corner case, if lo is the max offset then we would be out of range
16645          once we have added the additional 1 below, so bump the msb into the
16646          pre-loading insn(s).  */
16647       if (lo == 4095)
16648         lo &= 0x7ff;
16649
16650       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16651              ^ (HOST_WIDE_INT) 0x80000000)
16652             - (HOST_WIDE_INT) 0x80000000);
16653
16654       gcc_assert (hi + lo == offset);
16655
16656       if (hi != 0)
16657         {
16658           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16659
16660           /* Get the base address; addsi3 knows how to handle constants
16661              that require more than one insn.  */
16662           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16663           base = base_plus;
16664           offset = lo;
16665         }
16666     }
16667
16668   /* Operands[2] may overlap operands[0] (though it won't overlap
16669      operands[1]), that's why we asked for a DImode reg -- so we can
16670      use the bit that does not overlap.  */
16671   if (REGNO (operands[2]) == REGNO (operands[0]))
16672     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16673   else
16674     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16675
16676   emit_insn (gen_zero_extendqisi2 (scratch,
16677                                    gen_rtx_MEM (QImode,
16678                                                 plus_constant (Pmode, base,
16679                                                                offset))));
16680   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16681                                    gen_rtx_MEM (QImode,
16682                                                 plus_constant (Pmode, base,
16683                                                                offset + 1))));
16684   if (!BYTES_BIG_ENDIAN)
16685     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16686                    gen_rtx_IOR (SImode,
16687                                 gen_rtx_ASHIFT
16688                                 (SImode,
16689                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16690                                  GEN_INT (8)),
16691                                 scratch));
16692   else
16693     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16694                    gen_rtx_IOR (SImode,
16695                                 gen_rtx_ASHIFT (SImode, scratch,
16696                                                 GEN_INT (8)),
16697                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16698 }
16699
16700 /* Handle storing a half-word to memory during reload by synthesizing as two
16701    byte stores.  Take care not to clobber the input values until after we
16702    have moved them somewhere safe.  This code assumes that if the DImode
16703    scratch in operands[2] overlaps either the input value or output address
16704    in some way, then that value must die in this insn (we absolutely need
16705    two scratch registers for some corner cases).  */
16706 void
16707 arm_reload_out_hi (rtx *operands)
16708 {
16709   rtx ref = operands[0];
16710   rtx outval = operands[1];
16711   rtx base, scratch;
16712   HOST_WIDE_INT offset = 0;
16713
16714   if (SUBREG_P (ref))
16715     {
16716       offset = SUBREG_BYTE (ref);
16717       ref = SUBREG_REG (ref);
16718     }
16719
16720   if (REG_P (ref))
16721     {
16722       /* We have a pseudo which has been spilt onto the stack; there
16723          are two cases here: the first where there is a simple
16724          stack-slot replacement and a second where the stack-slot is
16725          out of range, or is used as a subreg.  */
16726       if (reg_equiv_mem (REGNO (ref)))
16727         {
16728           ref = reg_equiv_mem (REGNO (ref));
16729           base = find_replacement (&XEXP (ref, 0));
16730         }
16731       else
16732         /* The slot is out of range, or was dressed up in a SUBREG.  */
16733         base = reg_equiv_address (REGNO (ref));
16734
16735       /* PR 62254: If there is no equivalent memory location then just move
16736          the value as an SImode register move.  This happens when the target
16737          architecture variant does not have an HImode register move.  */
16738       if (base == NULL)
16739         {
16740           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16741
16742           if (REG_P (outval))
16743             {
16744               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16745                                     gen_rtx_SUBREG (SImode, outval, 0)));
16746             }
16747           else /* SUBREG_P (outval)  */
16748             {
16749               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16750                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16751                                       SUBREG_REG (outval)));
16752               else
16753                 /* FIXME: Handle other cases ?  */
16754                 gcc_unreachable ();
16755             }
16756           return;
16757         }
16758     }
16759   else
16760     base = find_replacement (&XEXP (ref, 0));
16761
16762   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16763
16764   /* Handle the case where the address is too complex to be offset by 1.  */
16765   if (GET_CODE (base) == MINUS
16766       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16767     {
16768       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16769
16770       /* Be careful not to destroy OUTVAL.  */
16771       if (reg_overlap_mentioned_p (base_plus, outval))
16772         {
16773           /* Updating base_plus might destroy outval, see if we can
16774              swap the scratch and base_plus.  */
16775           if (!reg_overlap_mentioned_p (scratch, outval))
16776             std::swap (scratch, base_plus);
16777           else
16778             {
16779               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16780
16781               /* Be conservative and copy OUTVAL into the scratch now,
16782                  this should only be necessary if outval is a subreg
16783                  of something larger than a word.  */
16784               /* XXX Might this clobber base?  I can't see how it can,
16785                  since scratch is known to overlap with OUTVAL, and
16786                  must be wider than a word.  */
16787               emit_insn (gen_movhi (scratch_hi, outval));
16788               outval = scratch_hi;
16789             }
16790         }
16791
16792       emit_set_insn (base_plus, base);
16793       base = base_plus;
16794     }
16795   else if (GET_CODE (base) == PLUS)
16796     {
16797       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16798       HOST_WIDE_INT hi, lo;
16799
16800       offset += INTVAL (XEXP (base, 1));
16801       base = XEXP (base, 0);
16802
16803       /* Rework the address into a legal sequence of insns.  */
16804       /* Valid range for lo is -4095 -> 4095 */
16805       lo = (offset >= 0
16806             ? (offset & 0xfff)
16807             : -((-offset) & 0xfff));
16808
16809       /* Corner case, if lo is the max offset then we would be out of range
16810          once we have added the additional 1 below, so bump the msb into the
16811          pre-loading insn(s).  */
16812       if (lo == 4095)
16813         lo &= 0x7ff;
16814
16815       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16816              ^ (HOST_WIDE_INT) 0x80000000)
16817             - (HOST_WIDE_INT) 0x80000000);
16818
16819       gcc_assert (hi + lo == offset);
16820
16821       if (hi != 0)
16822         {
16823           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16824
16825           /* Be careful not to destroy OUTVAL.  */
16826           if (reg_overlap_mentioned_p (base_plus, outval))
16827             {
16828               /* Updating base_plus might destroy outval, see if we
16829                  can swap the scratch and base_plus.  */
16830               if (!reg_overlap_mentioned_p (scratch, outval))
16831                 std::swap (scratch, base_plus);
16832               else
16833                 {
16834                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16835
16836                   /* Be conservative and copy outval into scratch now,
16837                      this should only be necessary if outval is a
16838                      subreg of something larger than a word.  */
16839                   /* XXX Might this clobber base?  I can't see how it
16840                      can, since scratch is known to overlap with
16841                      outval.  */
16842                   emit_insn (gen_movhi (scratch_hi, outval));
16843                   outval = scratch_hi;
16844                 }
16845             }
16846
16847           /* Get the base address; addsi3 knows how to handle constants
16848              that require more than one insn.  */
16849           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16850           base = base_plus;
16851           offset = lo;
16852         }
16853     }
16854
16855   if (BYTES_BIG_ENDIAN)
16856     {
16857       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16858                                          plus_constant (Pmode, base,
16859                                                         offset + 1)),
16860                             gen_lowpart (QImode, outval)));
16861       emit_insn (gen_lshrsi3 (scratch,
16862                               gen_rtx_SUBREG (SImode, outval, 0),
16863                               GEN_INT (8)));
16864       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16865                                                                 offset)),
16866                             gen_lowpart (QImode, scratch)));
16867     }
16868   else
16869     {
16870       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16871                                                                 offset)),
16872                             gen_lowpart (QImode, outval)));
16873       emit_insn (gen_lshrsi3 (scratch,
16874                               gen_rtx_SUBREG (SImode, outval, 0),
16875                               GEN_INT (8)));
16876       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16877                                          plus_constant (Pmode, base,
16878                                                         offset + 1)),
16879                             gen_lowpart (QImode, scratch)));
16880     }
16881 }
16882
16883 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16884    (padded to the size of a word) should be passed in a register.  */
16885
16886 static bool
16887 arm_must_pass_in_stack (const function_arg_info &arg)
16888 {
16889   if (TARGET_AAPCS_BASED)
16890     return must_pass_in_stack_var_size (arg);
16891   else
16892     return must_pass_in_stack_var_size_or_pad (arg);
16893 }
16894
16895
16896 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16897    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16898    the default.  For AAPCS based ABIs small aggregate types are placed
16899    in the lowest memory address.  */
16900
16901 static pad_direction
16902 arm_function_arg_padding (machine_mode mode, const_tree type)
16903 {
16904   if (!TARGET_AAPCS_BASED)
16905     return default_function_arg_padding (mode, type);
16906
16907   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16908     return PAD_DOWNWARD;
16909
16910   return PAD_UPWARD;
16911 }
16912
16913
16914 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16915    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16916    register has useful data, and return the opposite if the most
16917    significant byte does.  */
16918
16919 bool
16920 arm_pad_reg_upward (machine_mode mode,
16921                     tree type, int first ATTRIBUTE_UNUSED)
16922 {
16923   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16924     {
16925       /* For AAPCS, small aggregates, small fixed-point types,
16926          and small complex types are always padded upwards.  */
16927       if (type)
16928         {
16929           if ((AGGREGATE_TYPE_P (type)
16930                || TREE_CODE (type) == COMPLEX_TYPE
16931                || FIXED_POINT_TYPE_P (type))
16932               && int_size_in_bytes (type) <= 4)
16933             return true;
16934         }
16935       else
16936         {
16937           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16938               && GET_MODE_SIZE (mode) <= 4)
16939             return true;
16940         }
16941     }
16942
16943   /* Otherwise, use default padding.  */
16944   return !BYTES_BIG_ENDIAN;
16945 }
16946
16947 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16948    assuming that the address in the base register is word aligned.  */
16949 bool
16950 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16951 {
16952   HOST_WIDE_INT max_offset;
16953
16954   /* Offset must be a multiple of 4 in Thumb mode.  */
16955   if (TARGET_THUMB2 && ((offset & 3) != 0))
16956     return false;
16957
16958   if (TARGET_THUMB2)
16959     max_offset = 1020;
16960   else if (TARGET_ARM)
16961     max_offset = 255;
16962   else
16963     return false;
16964
16965   return ((offset <= max_offset) && (offset >= -max_offset));
16966 }
16967
16968 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16969    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16970    Assumes that the address in the base register RN is word aligned.  Pattern
16971    guarantees that both memory accesses use the same base register,
16972    the offsets are constants within the range, and the gap between the offsets is 4.
16973    If preload complete then check that registers are legal.  WBACK indicates whether
16974    address is updated.  LOAD indicates whether memory access is load or store.  */
16975 bool
16976 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16977                        bool wback, bool load)
16978 {
16979   unsigned int t, t2, n;
16980
16981   if (!reload_completed)
16982     return true;
16983
16984   if (!offset_ok_for_ldrd_strd (offset))
16985     return false;
16986
16987   t = REGNO (rt);
16988   t2 = REGNO (rt2);
16989   n = REGNO (rn);
16990
16991   if ((TARGET_THUMB2)
16992       && ((wback && (n == t || n == t2))
16993           || (t == SP_REGNUM)
16994           || (t == PC_REGNUM)
16995           || (t2 == SP_REGNUM)
16996           || (t2 == PC_REGNUM)
16997           || (!load && (n == PC_REGNUM))
16998           || (load && (t == t2))
16999           /* Triggers Cortex-M3 LDRD errata.  */
17000           || (!wback && load && fix_cm3_ldrd && (n == t))))
17001     return false;
17002
17003   if ((TARGET_ARM)
17004       && ((wback && (n == t || n == t2))
17005           || (t2 == PC_REGNUM)
17006           || (t % 2 != 0)   /* First destination register is not even.  */
17007           || (t2 != t + 1)
17008           /* PC can be used as base register (for offset addressing only),
17009              but it is depricated.  */
17010           || (n == PC_REGNUM)))
17011     return false;
17012
17013   return true;
17014 }
17015
17016 /* Return true if a 64-bit access with alignment ALIGN and with a
17017    constant offset OFFSET from the base pointer is permitted on this
17018    architecture.  */
17019 static bool
17020 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
17021 {
17022   return (unaligned_access
17023           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
17024           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
17025 }
17026
17027 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
17028    operand MEM's address contains an immediate offset from the base
17029    register and has no side effects, in which case it sets BASE,
17030    OFFSET and ALIGN accordingly.  */
17031 static bool
17032 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
17033 {
17034   rtx addr;
17035
17036   gcc_assert (base != NULL && offset != NULL);
17037
17038   /* TODO: Handle more general memory operand patterns, such as
17039      PRE_DEC and PRE_INC.  */
17040
17041   if (side_effects_p (mem))
17042     return false;
17043
17044   /* Can't deal with subregs.  */
17045   if (SUBREG_P (mem))
17046     return false;
17047
17048   gcc_assert (MEM_P (mem));
17049
17050   *offset = const0_rtx;
17051   *align = MEM_ALIGN (mem);
17052
17053   addr = XEXP (mem, 0);
17054
17055   /* If addr isn't valid for DImode, then we can't handle it.  */
17056   if (!arm_legitimate_address_p (DImode, addr,
17057                                  reload_in_progress || reload_completed))
17058     return false;
17059
17060   if (REG_P (addr))
17061     {
17062       *base = addr;
17063       return true;
17064     }
17065   else if (GET_CODE (addr) == PLUS)
17066     {
17067       *base = XEXP (addr, 0);
17068       *offset = XEXP (addr, 1);
17069       return (REG_P (*base) && CONST_INT_P (*offset));
17070     }
17071
17072   return false;
17073 }
17074
17075 /* Called from a peephole2 to replace two word-size accesses with a
17076    single LDRD/STRD instruction.  Returns true iff we can generate a
17077    new instruction sequence.  That is, both accesses use the same base
17078    register and the gap between constant offsets is 4.  This function
17079    may reorder its operands to match ldrd/strd RTL templates.
17080    OPERANDS are the operands found by the peephole matcher;
17081    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17082    corresponding memory operands.  LOAD indicaates whether the access
17083    is load or store.  CONST_STORE indicates a store of constant
17084    integer values held in OPERANDS[4,5] and assumes that the pattern
17085    is of length 4 insn, for the purpose of checking dead registers.
17086    COMMUTE indicates that register operands may be reordered.  */
17087 bool
17088 gen_operands_ldrd_strd (rtx *operands, bool load,
17089                         bool const_store, bool commute)
17090 {
17091   int nops = 2;
17092   HOST_WIDE_INT offsets[2], offset, align[2];
17093   rtx base = NULL_RTX;
17094   rtx cur_base, cur_offset, tmp;
17095   int i, gap;
17096   HARD_REG_SET regset;
17097
17098   gcc_assert (!const_store || !load);
17099   /* Check that the memory references are immediate offsets from the
17100      same base register.  Extract the base register, the destination
17101      registers, and the corresponding memory offsets.  */
17102   for (i = 0; i < nops; i++)
17103     {
17104       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17105                                  &align[i]))
17106         return false;
17107
17108       if (i == 0)
17109         base = cur_base;
17110       else if (REGNO (base) != REGNO (cur_base))
17111         return false;
17112
17113       offsets[i] = INTVAL (cur_offset);
17114       if (GET_CODE (operands[i]) == SUBREG)
17115         {
17116           tmp = SUBREG_REG (operands[i]);
17117           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17118           operands[i] = tmp;
17119         }
17120     }
17121
17122   /* Make sure there is no dependency between the individual loads.  */
17123   if (load && REGNO (operands[0]) == REGNO (base))
17124     return false; /* RAW */
17125
17126   if (load && REGNO (operands[0]) == REGNO (operands[1]))
17127     return false; /* WAW */
17128
17129   /* If the same input register is used in both stores
17130      when storing different constants, try to find a free register.
17131      For example, the code
17132         mov r0, 0
17133         str r0, [r2]
17134         mov r0, 1
17135         str r0, [r2, #4]
17136      can be transformed into
17137         mov r1, 0
17138         mov r0, 1
17139         strd r1, r0, [r2]
17140      in Thumb mode assuming that r1 is free.
17141      For ARM mode do the same but only if the starting register
17142      can be made to be even.  */
17143   if (const_store
17144       && REGNO (operands[0]) == REGNO (operands[1])
17145       && INTVAL (operands[4]) != INTVAL (operands[5]))
17146     {
17147     if (TARGET_THUMB2)
17148       {
17149         CLEAR_HARD_REG_SET (regset);
17150         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17151         if (tmp == NULL_RTX)
17152           return false;
17153
17154         /* Use the new register in the first load to ensure that
17155            if the original input register is not dead after peephole,
17156            then it will have the correct constant value.  */
17157         operands[0] = tmp;
17158       }
17159     else if (TARGET_ARM)
17160       {
17161         int regno = REGNO (operands[0]);
17162         if (!peep2_reg_dead_p (4, operands[0]))
17163           {
17164             /* When the input register is even and is not dead after the
17165                pattern, it has to hold the second constant but we cannot
17166                form a legal STRD in ARM mode with this register as the second
17167                register.  */
17168             if (regno % 2 == 0)
17169               return false;
17170
17171             /* Is regno-1 free? */
17172             SET_HARD_REG_SET (regset);
17173             CLEAR_HARD_REG_BIT(regset, regno - 1);
17174             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17175             if (tmp == NULL_RTX)
17176               return false;
17177
17178             operands[0] = tmp;
17179           }
17180         else
17181           {
17182             /* Find a DImode register.  */
17183             CLEAR_HARD_REG_SET (regset);
17184             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17185             if (tmp != NULL_RTX)
17186               {
17187                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17188                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17189               }
17190             else
17191               {
17192                 /* Can we use the input register to form a DI register?  */
17193                 SET_HARD_REG_SET (regset);
17194                 CLEAR_HARD_REG_BIT(regset,
17195                                    regno % 2 == 0 ? regno + 1 : regno - 1);
17196                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17197                 if (tmp == NULL_RTX)
17198                   return false;
17199                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17200               }
17201           }
17202
17203         gcc_assert (operands[0] != NULL_RTX);
17204         gcc_assert (operands[1] != NULL_RTX);
17205         gcc_assert (REGNO (operands[0]) % 2 == 0);
17206         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17207       }
17208     }
17209
17210   /* Make sure the instructions are ordered with lower memory access first.  */
17211   if (offsets[0] > offsets[1])
17212     {
17213       gap = offsets[0] - offsets[1];
17214       offset = offsets[1];
17215
17216       /* Swap the instructions such that lower memory is accessed first.  */
17217       std::swap (operands[0], operands[1]);
17218       std::swap (operands[2], operands[3]);
17219       std::swap (align[0], align[1]);
17220       if (const_store)
17221         std::swap (operands[4], operands[5]);
17222     }
17223   else
17224     {
17225       gap = offsets[1] - offsets[0];
17226       offset = offsets[0];
17227     }
17228
17229   /* Make sure accesses are to consecutive memory locations.  */
17230   if (gap != GET_MODE_SIZE (SImode))
17231     return false;
17232
17233   if (!align_ok_ldrd_strd (align[0], offset))
17234     return false;
17235
17236   /* Make sure we generate legal instructions.  */
17237   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17238                              false, load))
17239     return true;
17240
17241   /* In Thumb state, where registers are almost unconstrained, there
17242      is little hope to fix it.  */
17243   if (TARGET_THUMB2)
17244     return false;
17245
17246   if (load && commute)
17247     {
17248       /* Try reordering registers.  */
17249       std::swap (operands[0], operands[1]);
17250       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17251                                  false, load))
17252         return true;
17253     }
17254
17255   if (const_store)
17256     {
17257       /* If input registers are dead after this pattern, they can be
17258          reordered or replaced by other registers that are free in the
17259          current pattern.  */
17260       if (!peep2_reg_dead_p (4, operands[0])
17261           || !peep2_reg_dead_p (4, operands[1]))
17262         return false;
17263
17264       /* Try to reorder the input registers.  */
17265       /* For example, the code
17266            mov r0, 0
17267            mov r1, 1
17268            str r1, [r2]
17269            str r0, [r2, #4]
17270          can be transformed into
17271            mov r1, 0
17272            mov r0, 1
17273            strd r0, [r2]
17274       */
17275       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17276                                   false, false))
17277         {
17278           std::swap (operands[0], operands[1]);
17279           return true;
17280         }
17281
17282       /* Try to find a free DI register.  */
17283       CLEAR_HARD_REG_SET (regset);
17284       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17285       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17286       while (true)
17287         {
17288           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17289           if (tmp == NULL_RTX)
17290             return false;
17291
17292           /* DREG must be an even-numbered register in DImode.
17293              Split it into SI registers.  */
17294           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17295           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17296           gcc_assert (operands[0] != NULL_RTX);
17297           gcc_assert (operands[1] != NULL_RTX);
17298           gcc_assert (REGNO (operands[0]) % 2 == 0);
17299           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17300
17301           return (operands_ok_ldrd_strd (operands[0], operands[1],
17302                                          base, offset,
17303                                          false, load));
17304         }
17305     }
17306
17307   return false;
17308 }
17309
17310
17311 /* Return true if parallel execution of the two word-size accesses provided
17312    could be satisfied with a single LDRD/STRD instruction.  Two word-size
17313    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17314    register operands and OPERANDS[2,3] are the corresponding memory operands.
17315    */
17316 bool
17317 valid_operands_ldrd_strd (rtx *operands, bool load)
17318 {
17319   int nops = 2;
17320   HOST_WIDE_INT offsets[2], offset, align[2];
17321   rtx base = NULL_RTX;
17322   rtx cur_base, cur_offset;
17323   int i, gap;
17324
17325   /* Check that the memory references are immediate offsets from the
17326      same base register.  Extract the base register, the destination
17327      registers, and the corresponding memory offsets.  */
17328   for (i = 0; i < nops; i++)
17329     {
17330       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17331                                  &align[i]))
17332         return false;
17333
17334       if (i == 0)
17335         base = cur_base;
17336       else if (REGNO (base) != REGNO (cur_base))
17337         return false;
17338
17339       offsets[i] = INTVAL (cur_offset);
17340       if (GET_CODE (operands[i]) == SUBREG)
17341         return false;
17342     }
17343
17344   if (offsets[0] > offsets[1])
17345     return false;
17346
17347   gap = offsets[1] - offsets[0];
17348   offset = offsets[0];
17349
17350   /* Make sure accesses are to consecutive memory locations.  */
17351   if (gap != GET_MODE_SIZE (SImode))
17352     return false;
17353
17354   if (!align_ok_ldrd_strd (align[0], offset))
17355     return false;
17356
17357   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17358                                 false, load);
17359 }
17360
17361 \f
17362 /* Print a symbolic form of X to the debug file, F.  */
17363 static void
17364 arm_print_value (FILE *f, rtx x)
17365 {
17366   switch (GET_CODE (x))
17367     {
17368     case CONST_INT:
17369       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17370       return;
17371
17372     case CONST_DOUBLE:
17373       {
17374         char fpstr[20];
17375         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17376                          sizeof (fpstr), 0, 1);
17377         fputs (fpstr, f);
17378       }
17379       return;
17380
17381     case CONST_VECTOR:
17382       {
17383         int i;
17384
17385         fprintf (f, "<");
17386         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17387           {
17388             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17389             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17390               fputc (',', f);
17391           }
17392         fprintf (f, ">");
17393       }
17394       return;
17395
17396     case CONST_STRING:
17397       fprintf (f, "\"%s\"", XSTR (x, 0));
17398       return;
17399
17400     case SYMBOL_REF:
17401       fprintf (f, "`%s'", XSTR (x, 0));
17402       return;
17403
17404     case LABEL_REF:
17405       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17406       return;
17407
17408     case CONST:
17409       arm_print_value (f, XEXP (x, 0));
17410       return;
17411
17412     case PLUS:
17413       arm_print_value (f, XEXP (x, 0));
17414       fprintf (f, "+");
17415       arm_print_value (f, XEXP (x, 1));
17416       return;
17417
17418     case PC:
17419       fprintf (f, "pc");
17420       return;
17421
17422     default:
17423       fprintf (f, "????");
17424       return;
17425     }
17426 }
17427 \f
17428 /* Routines for manipulation of the constant pool.  */
17429
17430 /* Arm instructions cannot load a large constant directly into a
17431    register; they have to come from a pc relative load.  The constant
17432    must therefore be placed in the addressable range of the pc
17433    relative load.  Depending on the precise pc relative load
17434    instruction the range is somewhere between 256 bytes and 4k.  This
17435    means that we often have to dump a constant inside a function, and
17436    generate code to branch around it.
17437
17438    It is important to minimize this, since the branches will slow
17439    things down and make the code larger.
17440
17441    Normally we can hide the table after an existing unconditional
17442    branch so that there is no interruption of the flow, but in the
17443    worst case the code looks like this:
17444
17445         ldr     rn, L1
17446         ...
17447         b       L2
17448         align
17449         L1:     .long value
17450         L2:
17451         ...
17452
17453         ldr     rn, L3
17454         ...
17455         b       L4
17456         align
17457         L3:     .long value
17458         L4:
17459         ...
17460
17461    We fix this by performing a scan after scheduling, which notices
17462    which instructions need to have their operands fetched from the
17463    constant table and builds the table.
17464
17465    The algorithm starts by building a table of all the constants that
17466    need fixing up and all the natural barriers in the function (places
17467    where a constant table can be dropped without breaking the flow).
17468    For each fixup we note how far the pc-relative replacement will be
17469    able to reach and the offset of the instruction into the function.
17470
17471    Having built the table we then group the fixes together to form
17472    tables that are as large as possible (subject to addressing
17473    constraints) and emit each table of constants after the last
17474    barrier that is within range of all the instructions in the group.
17475    If a group does not contain a barrier, then we forcibly create one
17476    by inserting a jump instruction into the flow.  Once the table has
17477    been inserted, the insns are then modified to reference the
17478    relevant entry in the pool.
17479
17480    Possible enhancements to the algorithm (not implemented) are:
17481
17482    1) For some processors and object formats, there may be benefit in
17483    aligning the pools to the start of cache lines; this alignment
17484    would need to be taken into account when calculating addressability
17485    of a pool.  */
17486
17487 /* These typedefs are located at the start of this file, so that
17488    they can be used in the prototypes there.  This comment is to
17489    remind readers of that fact so that the following structures
17490    can be understood more easily.
17491
17492      typedef struct minipool_node    Mnode;
17493      typedef struct minipool_fixup   Mfix;  */
17494
17495 struct minipool_node
17496 {
17497   /* Doubly linked chain of entries.  */
17498   Mnode * next;
17499   Mnode * prev;
17500   /* The maximum offset into the code that this entry can be placed.  While
17501      pushing fixes for forward references, all entries are sorted in order
17502      of increasing max_address.  */
17503   HOST_WIDE_INT max_address;
17504   /* Similarly for an entry inserted for a backwards ref.  */
17505   HOST_WIDE_INT min_address;
17506   /* The number of fixes referencing this entry.  This can become zero
17507      if we "unpush" an entry.  In this case we ignore the entry when we
17508      come to emit the code.  */
17509   int refcount;
17510   /* The offset from the start of the minipool.  */
17511   HOST_WIDE_INT offset;
17512   /* The value in table.  */
17513   rtx value;
17514   /* The mode of value.  */
17515   machine_mode mode;
17516   /* The size of the value.  With iWMMXt enabled
17517      sizes > 4 also imply an alignment of 8-bytes.  */
17518   int fix_size;
17519 };
17520
17521 struct minipool_fixup
17522 {
17523   Mfix *            next;
17524   rtx_insn *        insn;
17525   HOST_WIDE_INT     address;
17526   rtx *             loc;
17527   machine_mode mode;
17528   int               fix_size;
17529   rtx               value;
17530   Mnode *           minipool;
17531   HOST_WIDE_INT     forwards;
17532   HOST_WIDE_INT     backwards;
17533 };
17534
17535 /* Fixes less than a word need padding out to a word boundary.  */
17536 #define MINIPOOL_FIX_SIZE(mode) \
17537   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17538
17539 static Mnode *  minipool_vector_head;
17540 static Mnode *  minipool_vector_tail;
17541 static rtx_code_label   *minipool_vector_label;
17542 static int      minipool_pad;
17543
17544 /* The linked list of all minipool fixes required for this function.  */
17545 Mfix *          minipool_fix_head;
17546 Mfix *          minipool_fix_tail;
17547 /* The fix entry for the current minipool, once it has been placed.  */
17548 Mfix *          minipool_barrier;
17549
17550 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17551 #define JUMP_TABLES_IN_TEXT_SECTION 0
17552 #endif
17553
17554 static HOST_WIDE_INT
17555 get_jump_table_size (rtx_jump_table_data *insn)
17556 {
17557   /* ADDR_VECs only take room if read-only data does into the text
17558      section.  */
17559   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17560     {
17561       rtx body = PATTERN (insn);
17562       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17563       HOST_WIDE_INT size;
17564       HOST_WIDE_INT modesize;
17565
17566       modesize = GET_MODE_SIZE (GET_MODE (body));
17567       size = modesize * XVECLEN (body, elt);
17568       switch (modesize)
17569         {
17570         case 1:
17571           /* Round up size  of TBB table to a halfword boundary.  */
17572           size = (size + 1) & ~HOST_WIDE_INT_1;
17573           break;
17574         case 2:
17575           /* No padding necessary for TBH.  */
17576           break;
17577         case 4:
17578           /* Add two bytes for alignment on Thumb.  */
17579           if (TARGET_THUMB)
17580             size += 2;
17581           break;
17582         default:
17583           gcc_unreachable ();
17584         }
17585       return size;
17586     }
17587
17588   return 0;
17589 }
17590
17591 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17592    function descriptor) into a register and the GOT address into the
17593    FDPIC register, returning an rtx for the register holding the
17594    function address.  */
17595
17596 rtx
17597 arm_load_function_descriptor (rtx funcdesc)
17598 {
17599   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17600   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17601   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17602   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17603
17604   emit_move_insn (fnaddr_reg, fnaddr);
17605
17606   /* The ABI requires the entry point address to be loaded first, but
17607      since we cannot support lazy binding for lack of atomic load of
17608      two 32-bits values, we do not need to bother to prevent the
17609      previous load from being moved after that of the GOT address.  */
17610   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17611
17612   return fnaddr_reg;
17613 }
17614
17615 /* Return the maximum amount of padding that will be inserted before
17616    label LABEL.  */
17617 static HOST_WIDE_INT
17618 get_label_padding (rtx label)
17619 {
17620   HOST_WIDE_INT align, min_insn_size;
17621
17622   align = 1 << label_to_alignment (label).levels[0].log;
17623   min_insn_size = TARGET_THUMB ? 2 : 4;
17624   return align > min_insn_size ? align - min_insn_size : 0;
17625 }
17626
17627 /* Move a minipool fix MP from its current location to before MAX_MP.
17628    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17629    constraints may need updating.  */
17630 static Mnode *
17631 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17632                                HOST_WIDE_INT max_address)
17633 {
17634   /* The code below assumes these are different.  */
17635   gcc_assert (mp != max_mp);
17636
17637   if (max_mp == NULL)
17638     {
17639       if (max_address < mp->max_address)
17640         mp->max_address = max_address;
17641     }
17642   else
17643     {
17644       if (max_address > max_mp->max_address - mp->fix_size)
17645         mp->max_address = max_mp->max_address - mp->fix_size;
17646       else
17647         mp->max_address = max_address;
17648
17649       /* Unlink MP from its current position.  Since max_mp is non-null,
17650        mp->prev must be non-null.  */
17651       mp->prev->next = mp->next;
17652       if (mp->next != NULL)
17653         mp->next->prev = mp->prev;
17654       else
17655         minipool_vector_tail = mp->prev;
17656
17657       /* Re-insert it before MAX_MP.  */
17658       mp->next = max_mp;
17659       mp->prev = max_mp->prev;
17660       max_mp->prev = mp;
17661
17662       if (mp->prev != NULL)
17663         mp->prev->next = mp;
17664       else
17665         minipool_vector_head = mp;
17666     }
17667
17668   /* Save the new entry.  */
17669   max_mp = mp;
17670
17671   /* Scan over the preceding entries and adjust their addresses as
17672      required.  */
17673   while (mp->prev != NULL
17674          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17675     {
17676       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17677       mp = mp->prev;
17678     }
17679
17680   return max_mp;
17681 }
17682
17683 /* Add a constant to the minipool for a forward reference.  Returns the
17684    node added or NULL if the constant will not fit in this pool.  */
17685 static Mnode *
17686 add_minipool_forward_ref (Mfix *fix)
17687 {
17688   /* If set, max_mp is the first pool_entry that has a lower
17689      constraint than the one we are trying to add.  */
17690   Mnode *       max_mp = NULL;
17691   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17692   Mnode *       mp;
17693
17694   /* If the minipool starts before the end of FIX->INSN then this FIX
17695      cannot be placed into the current pool.  Furthermore, adding the
17696      new constant pool entry may cause the pool to start FIX_SIZE bytes
17697      earlier.  */
17698   if (minipool_vector_head &&
17699       (fix->address + get_attr_length (fix->insn)
17700        >= minipool_vector_head->max_address - fix->fix_size))
17701     return NULL;
17702
17703   /* Scan the pool to see if a constant with the same value has
17704      already been added.  While we are doing this, also note the
17705      location where we must insert the constant if it doesn't already
17706      exist.  */
17707   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17708     {
17709       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17710           && fix->mode == mp->mode
17711           && (!LABEL_P (fix->value)
17712               || (CODE_LABEL_NUMBER (fix->value)
17713                   == CODE_LABEL_NUMBER (mp->value)))
17714           && rtx_equal_p (fix->value, mp->value))
17715         {
17716           /* More than one fix references this entry.  */
17717           mp->refcount++;
17718           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17719         }
17720
17721       /* Note the insertion point if necessary.  */
17722       if (max_mp == NULL
17723           && mp->max_address > max_address)
17724         max_mp = mp;
17725
17726       /* If we are inserting an 8-bytes aligned quantity and
17727          we have not already found an insertion point, then
17728          make sure that all such 8-byte aligned quantities are
17729          placed at the start of the pool.  */
17730       if (ARM_DOUBLEWORD_ALIGN
17731           && max_mp == NULL
17732           && fix->fix_size >= 8
17733           && mp->fix_size < 8)
17734         {
17735           max_mp = mp;
17736           max_address = mp->max_address;
17737         }
17738     }
17739
17740   /* The value is not currently in the minipool, so we need to create
17741      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17742      the end of the list since the placement is less constrained than
17743      any existing entry.  Otherwise, we insert the new fix before
17744      MAX_MP and, if necessary, adjust the constraints on the other
17745      entries.  */
17746   mp = XNEW (Mnode);
17747   mp->fix_size = fix->fix_size;
17748   mp->mode = fix->mode;
17749   mp->value = fix->value;
17750   mp->refcount = 1;
17751   /* Not yet required for a backwards ref.  */
17752   mp->min_address = -65536;
17753
17754   if (max_mp == NULL)
17755     {
17756       mp->max_address = max_address;
17757       mp->next = NULL;
17758       mp->prev = minipool_vector_tail;
17759
17760       if (mp->prev == NULL)
17761         {
17762           minipool_vector_head = mp;
17763           minipool_vector_label = gen_label_rtx ();
17764         }
17765       else
17766         mp->prev->next = mp;
17767
17768       minipool_vector_tail = mp;
17769     }
17770   else
17771     {
17772       if (max_address > max_mp->max_address - mp->fix_size)
17773         mp->max_address = max_mp->max_address - mp->fix_size;
17774       else
17775         mp->max_address = max_address;
17776
17777       mp->next = max_mp;
17778       mp->prev = max_mp->prev;
17779       max_mp->prev = mp;
17780       if (mp->prev != NULL)
17781         mp->prev->next = mp;
17782       else
17783         minipool_vector_head = mp;
17784     }
17785
17786   /* Save the new entry.  */
17787   max_mp = mp;
17788
17789   /* Scan over the preceding entries and adjust their addresses as
17790      required.  */
17791   while (mp->prev != NULL
17792          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17793     {
17794       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17795       mp = mp->prev;
17796     }
17797
17798   return max_mp;
17799 }
17800
17801 static Mnode *
17802 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17803                                 HOST_WIDE_INT  min_address)
17804 {
17805   HOST_WIDE_INT offset;
17806
17807   /* The code below assumes these are different.  */
17808   gcc_assert (mp != min_mp);
17809
17810   if (min_mp == NULL)
17811     {
17812       if (min_address > mp->min_address)
17813         mp->min_address = min_address;
17814     }
17815   else
17816     {
17817       /* We will adjust this below if it is too loose.  */
17818       mp->min_address = min_address;
17819
17820       /* Unlink MP from its current position.  Since min_mp is non-null,
17821          mp->next must be non-null.  */
17822       mp->next->prev = mp->prev;
17823       if (mp->prev != NULL)
17824         mp->prev->next = mp->next;
17825       else
17826         minipool_vector_head = mp->next;
17827
17828       /* Reinsert it after MIN_MP.  */
17829       mp->prev = min_mp;
17830       mp->next = min_mp->next;
17831       min_mp->next = mp;
17832       if (mp->next != NULL)
17833         mp->next->prev = mp;
17834       else
17835         minipool_vector_tail = mp;
17836     }
17837
17838   min_mp = mp;
17839
17840   offset = 0;
17841   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17842     {
17843       mp->offset = offset;
17844       if (mp->refcount > 0)
17845         offset += mp->fix_size;
17846
17847       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17848         mp->next->min_address = mp->min_address + mp->fix_size;
17849     }
17850
17851   return min_mp;
17852 }
17853
17854 /* Add a constant to the minipool for a backward reference.  Returns the
17855    node added or NULL if the constant will not fit in this pool.
17856
17857    Note that the code for insertion for a backwards reference can be
17858    somewhat confusing because the calculated offsets for each fix do
17859    not take into account the size of the pool (which is still under
17860    construction.  */
17861 static Mnode *
17862 add_minipool_backward_ref (Mfix *fix)
17863 {
17864   /* If set, min_mp is the last pool_entry that has a lower constraint
17865      than the one we are trying to add.  */
17866   Mnode *min_mp = NULL;
17867   /* This can be negative, since it is only a constraint.  */
17868   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17869   Mnode *mp;
17870
17871   /* If we can't reach the current pool from this insn, or if we can't
17872      insert this entry at the end of the pool without pushing other
17873      fixes out of range, then we don't try.  This ensures that we
17874      can't fail later on.  */
17875   if (min_address >= minipool_barrier->address
17876       || (minipool_vector_tail->min_address + fix->fix_size
17877           >= minipool_barrier->address))
17878     return NULL;
17879
17880   /* Scan the pool to see if a constant with the same value has
17881      already been added.  While we are doing this, also note the
17882      location where we must insert the constant if it doesn't already
17883      exist.  */
17884   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17885     {
17886       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17887           && fix->mode == mp->mode
17888           && (!LABEL_P (fix->value)
17889               || (CODE_LABEL_NUMBER (fix->value)
17890                   == CODE_LABEL_NUMBER (mp->value)))
17891           && rtx_equal_p (fix->value, mp->value)
17892           /* Check that there is enough slack to move this entry to the
17893              end of the table (this is conservative).  */
17894           && (mp->max_address
17895               > (minipool_barrier->address
17896                  + minipool_vector_tail->offset
17897                  + minipool_vector_tail->fix_size)))
17898         {
17899           mp->refcount++;
17900           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17901         }
17902
17903       if (min_mp != NULL)
17904         mp->min_address += fix->fix_size;
17905       else
17906         {
17907           /* Note the insertion point if necessary.  */
17908           if (mp->min_address < min_address)
17909             {
17910               /* For now, we do not allow the insertion of 8-byte alignment
17911                  requiring nodes anywhere but at the start of the pool.  */
17912               if (ARM_DOUBLEWORD_ALIGN
17913                   && fix->fix_size >= 8 && mp->fix_size < 8)
17914                 return NULL;
17915               else
17916                 min_mp = mp;
17917             }
17918           else if (mp->max_address
17919                    < minipool_barrier->address + mp->offset + fix->fix_size)
17920             {
17921               /* Inserting before this entry would push the fix beyond
17922                  its maximum address (which can happen if we have
17923                  re-located a forwards fix); force the new fix to come
17924                  after it.  */
17925               if (ARM_DOUBLEWORD_ALIGN
17926                   && fix->fix_size >= 8 && mp->fix_size < 8)
17927                 return NULL;
17928               else
17929                 {
17930                   min_mp = mp;
17931                   min_address = mp->min_address + fix->fix_size;
17932                 }
17933             }
17934           /* Do not insert a non-8-byte aligned quantity before 8-byte
17935              aligned quantities.  */
17936           else if (ARM_DOUBLEWORD_ALIGN
17937                    && fix->fix_size < 8
17938                    && mp->fix_size >= 8)
17939             {
17940               min_mp = mp;
17941               min_address = mp->min_address + fix->fix_size;
17942             }
17943         }
17944     }
17945
17946   /* We need to create a new entry.  */
17947   mp = XNEW (Mnode);
17948   mp->fix_size = fix->fix_size;
17949   mp->mode = fix->mode;
17950   mp->value = fix->value;
17951   mp->refcount = 1;
17952   mp->max_address = minipool_barrier->address + 65536;
17953
17954   mp->min_address = min_address;
17955
17956   if (min_mp == NULL)
17957     {
17958       mp->prev = NULL;
17959       mp->next = minipool_vector_head;
17960
17961       if (mp->next == NULL)
17962         {
17963           minipool_vector_tail = mp;
17964           minipool_vector_label = gen_label_rtx ();
17965         }
17966       else
17967         mp->next->prev = mp;
17968
17969       minipool_vector_head = mp;
17970     }
17971   else
17972     {
17973       mp->next = min_mp->next;
17974       mp->prev = min_mp;
17975       min_mp->next = mp;
17976
17977       if (mp->next != NULL)
17978         mp->next->prev = mp;
17979       else
17980         minipool_vector_tail = mp;
17981     }
17982
17983   /* Save the new entry.  */
17984   min_mp = mp;
17985
17986   if (mp->prev)
17987     mp = mp->prev;
17988   else
17989     mp->offset = 0;
17990
17991   /* Scan over the following entries and adjust their offsets.  */
17992   while (mp->next != NULL)
17993     {
17994       if (mp->next->min_address < mp->min_address + mp->fix_size)
17995         mp->next->min_address = mp->min_address + mp->fix_size;
17996
17997       if (mp->refcount)
17998         mp->next->offset = mp->offset + mp->fix_size;
17999       else
18000         mp->next->offset = mp->offset;
18001
18002       mp = mp->next;
18003     }
18004
18005   return min_mp;
18006 }
18007
18008 static void
18009 assign_minipool_offsets (Mfix *barrier)
18010 {
18011   HOST_WIDE_INT offset = 0;
18012   Mnode *mp;
18013
18014   minipool_barrier = barrier;
18015
18016   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18017     {
18018       mp->offset = offset;
18019
18020       if (mp->refcount > 0)
18021         offset += mp->fix_size;
18022     }
18023 }
18024
18025 /* Output the literal table */
18026 static void
18027 dump_minipool (rtx_insn *scan)
18028 {
18029   Mnode * mp;
18030   Mnode * nmp;
18031   int align64 = 0;
18032
18033   if (ARM_DOUBLEWORD_ALIGN)
18034     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18035       if (mp->refcount > 0 && mp->fix_size >= 8)
18036         {
18037           align64 = 1;
18038           break;
18039         }
18040
18041   if (dump_file)
18042     fprintf (dump_file,
18043              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18044              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18045
18046   scan = emit_label_after (gen_label_rtx (), scan);
18047   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18048   scan = emit_label_after (minipool_vector_label, scan);
18049
18050   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18051     {
18052       if (mp->refcount > 0)
18053         {
18054           if (dump_file)
18055             {
18056               fprintf (dump_file,
18057                        ";;  Offset %u, min %ld, max %ld ",
18058                        (unsigned) mp->offset, (unsigned long) mp->min_address,
18059                        (unsigned long) mp->max_address);
18060               arm_print_value (dump_file, mp->value);
18061               fputc ('\n', dump_file);
18062             }
18063
18064           rtx val = copy_rtx (mp->value);
18065
18066           switch (GET_MODE_SIZE (mp->mode))
18067             {
18068 #ifdef HAVE_consttable_1
18069             case 1:
18070               scan = emit_insn_after (gen_consttable_1 (val), scan);
18071               break;
18072
18073 #endif
18074 #ifdef HAVE_consttable_2
18075             case 2:
18076               scan = emit_insn_after (gen_consttable_2 (val), scan);
18077               break;
18078
18079 #endif
18080 #ifdef HAVE_consttable_4
18081             case 4:
18082               scan = emit_insn_after (gen_consttable_4 (val), scan);
18083               break;
18084
18085 #endif
18086 #ifdef HAVE_consttable_8
18087             case 8:
18088               scan = emit_insn_after (gen_consttable_8 (val), scan);
18089               break;
18090
18091 #endif
18092 #ifdef HAVE_consttable_16
18093             case 16:
18094               scan = emit_insn_after (gen_consttable_16 (val), scan);
18095               break;
18096
18097 #endif
18098             default:
18099               gcc_unreachable ();
18100             }
18101         }
18102
18103       nmp = mp->next;
18104       free (mp);
18105     }
18106
18107   minipool_vector_head = minipool_vector_tail = NULL;
18108   scan = emit_insn_after (gen_consttable_end (), scan);
18109   scan = emit_barrier_after (scan);
18110 }
18111
18112 /* Return the cost of forcibly inserting a barrier after INSN.  */
18113 static int
18114 arm_barrier_cost (rtx_insn *insn)
18115 {
18116   /* Basing the location of the pool on the loop depth is preferable,
18117      but at the moment, the basic block information seems to be
18118      corrupt by this stage of the compilation.  */
18119   int base_cost = 50;
18120   rtx_insn *next = next_nonnote_insn (insn);
18121
18122   if (next != NULL && LABEL_P (next))
18123     base_cost -= 20;
18124
18125   switch (GET_CODE (insn))
18126     {
18127     case CODE_LABEL:
18128       /* It will always be better to place the table before the label, rather
18129          than after it.  */
18130       return 50;
18131
18132     case INSN:
18133     case CALL_INSN:
18134       return base_cost;
18135
18136     case JUMP_INSN:
18137       return base_cost - 10;
18138
18139     default:
18140       return base_cost + 10;
18141     }
18142 }
18143
18144 /* Find the best place in the insn stream in the range
18145    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18146    Create the barrier by inserting a jump and add a new fix entry for
18147    it.  */
18148 static Mfix *
18149 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18150 {
18151   HOST_WIDE_INT count = 0;
18152   rtx_barrier *barrier;
18153   rtx_insn *from = fix->insn;
18154   /* The instruction after which we will insert the jump.  */
18155   rtx_insn *selected = NULL;
18156   int selected_cost;
18157   /* The address at which the jump instruction will be placed.  */
18158   HOST_WIDE_INT selected_address;
18159   Mfix * new_fix;
18160   HOST_WIDE_INT max_count = max_address - fix->address;
18161   rtx_code_label *label = gen_label_rtx ();
18162
18163   selected_cost = arm_barrier_cost (from);
18164   selected_address = fix->address;
18165
18166   while (from && count < max_count)
18167     {
18168       rtx_jump_table_data *tmp;
18169       int new_cost;
18170
18171       /* This code shouldn't have been called if there was a natural barrier
18172          within range.  */
18173       gcc_assert (!BARRIER_P (from));
18174
18175       /* Count the length of this insn.  This must stay in sync with the
18176          code that pushes minipool fixes.  */
18177       if (LABEL_P (from))
18178         count += get_label_padding (from);
18179       else
18180         count += get_attr_length (from);
18181
18182       /* If there is a jump table, add its length.  */
18183       if (tablejump_p (from, NULL, &tmp))
18184         {
18185           count += get_jump_table_size (tmp);
18186
18187           /* Jump tables aren't in a basic block, so base the cost on
18188              the dispatch insn.  If we select this location, we will
18189              still put the pool after the table.  */
18190           new_cost = arm_barrier_cost (from);
18191
18192           if (count < max_count
18193               && (!selected || new_cost <= selected_cost))
18194             {
18195               selected = tmp;
18196               selected_cost = new_cost;
18197               selected_address = fix->address + count;
18198             }
18199
18200           /* Continue after the dispatch table.  */
18201           from = NEXT_INSN (tmp);
18202           continue;
18203         }
18204
18205       new_cost = arm_barrier_cost (from);
18206
18207       if (count < max_count
18208           && (!selected || new_cost <= selected_cost))
18209         {
18210           selected = from;
18211           selected_cost = new_cost;
18212           selected_address = fix->address + count;
18213         }
18214
18215       from = NEXT_INSN (from);
18216     }
18217
18218   /* Make sure that we found a place to insert the jump.  */
18219   gcc_assert (selected);
18220
18221   /* Create a new JUMP_INSN that branches around a barrier.  */
18222   from = emit_jump_insn_after (gen_jump (label), selected);
18223   JUMP_LABEL (from) = label;
18224   barrier = emit_barrier_after (from);
18225   emit_label_after (label, barrier);
18226
18227   /* Create a minipool barrier entry for the new barrier.  */
18228   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18229   new_fix->insn = barrier;
18230   new_fix->address = selected_address;
18231   new_fix->next = fix->next;
18232   fix->next = new_fix;
18233
18234   return new_fix;
18235 }
18236
18237 /* Record that there is a natural barrier in the insn stream at
18238    ADDRESS.  */
18239 static void
18240 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18241 {
18242   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18243
18244   fix->insn = insn;
18245   fix->address = address;
18246
18247   fix->next = NULL;
18248   if (minipool_fix_head != NULL)
18249     minipool_fix_tail->next = fix;
18250   else
18251     minipool_fix_head = fix;
18252
18253   minipool_fix_tail = fix;
18254 }
18255
18256 /* Record INSN, which will need fixing up to load a value from the
18257    minipool.  ADDRESS is the offset of the insn since the start of the
18258    function; LOC is a pointer to the part of the insn which requires
18259    fixing; VALUE is the constant that must be loaded, which is of type
18260    MODE.  */
18261 static void
18262 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18263                    machine_mode mode, rtx value)
18264 {
18265   gcc_assert (!arm_disable_literal_pool);
18266   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18267
18268   fix->insn = insn;
18269   fix->address = address;
18270   fix->loc = loc;
18271   fix->mode = mode;
18272   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18273   fix->value = value;
18274   fix->forwards = get_attr_pool_range (insn);
18275   fix->backwards = get_attr_neg_pool_range (insn);
18276   fix->minipool = NULL;
18277
18278   /* If an insn doesn't have a range defined for it, then it isn't
18279      expecting to be reworked by this code.  Better to stop now than
18280      to generate duff assembly code.  */
18281   gcc_assert (fix->forwards || fix->backwards);
18282
18283   /* If an entry requires 8-byte alignment then assume all constant pools
18284      require 4 bytes of padding.  Trying to do this later on a per-pool
18285      basis is awkward because existing pool entries have to be modified.  */
18286   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18287     minipool_pad = 4;
18288
18289   if (dump_file)
18290     {
18291       fprintf (dump_file,
18292                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18293                GET_MODE_NAME (mode),
18294                INSN_UID (insn), (unsigned long) address,
18295                -1 * (long)fix->backwards, (long)fix->forwards);
18296       arm_print_value (dump_file, fix->value);
18297       fprintf (dump_file, "\n");
18298     }
18299
18300   /* Add it to the chain of fixes.  */
18301   fix->next = NULL;
18302
18303   if (minipool_fix_head != NULL)
18304     minipool_fix_tail->next = fix;
18305   else
18306     minipool_fix_head = fix;
18307
18308   minipool_fix_tail = fix;
18309 }
18310
18311 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18312    Returns the number of insns needed, or 99 if we always want to synthesize
18313    the value.  */
18314 int
18315 arm_max_const_double_inline_cost ()
18316 {
18317   return ((optimize_size || arm_ld_sched) ? 3 : 4);
18318 }
18319
18320 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18321    Returns the number of insns needed, or 99 if we don't know how to
18322    do it.  */
18323 int
18324 arm_const_double_inline_cost (rtx val)
18325 {
18326   rtx lowpart, highpart;
18327   machine_mode mode;
18328
18329   mode = GET_MODE (val);
18330
18331   if (mode == VOIDmode)
18332     mode = DImode;
18333
18334   gcc_assert (GET_MODE_SIZE (mode) == 8);
18335
18336   lowpart = gen_lowpart (SImode, val);
18337   highpart = gen_highpart_mode (SImode, mode, val);
18338
18339   gcc_assert (CONST_INT_P (lowpart));
18340   gcc_assert (CONST_INT_P (highpart));
18341
18342   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18343                             NULL_RTX, NULL_RTX, 0, 0)
18344           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18345                               NULL_RTX, NULL_RTX, 0, 0));
18346 }
18347
18348 /* Cost of loading a SImode constant.  */
18349 static inline int
18350 arm_const_inline_cost (enum rtx_code code, rtx val)
18351 {
18352   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18353                            NULL_RTX, NULL_RTX, 1, 0);
18354 }
18355
18356 /* Return true if it is worthwhile to split a 64-bit constant into two
18357    32-bit operations.  This is the case if optimizing for size, or
18358    if we have load delay slots, or if one 32-bit part can be done with
18359    a single data operation.  */
18360 bool
18361 arm_const_double_by_parts (rtx val)
18362 {
18363   machine_mode mode = GET_MODE (val);
18364   rtx part;
18365
18366   if (optimize_size || arm_ld_sched)
18367     return true;
18368
18369   if (mode == VOIDmode)
18370     mode = DImode;
18371
18372   part = gen_highpart_mode (SImode, mode, val);
18373
18374   gcc_assert (CONST_INT_P (part));
18375
18376   if (const_ok_for_arm (INTVAL (part))
18377       || const_ok_for_arm (~INTVAL (part)))
18378     return true;
18379
18380   part = gen_lowpart (SImode, val);
18381
18382   gcc_assert (CONST_INT_P (part));
18383
18384   if (const_ok_for_arm (INTVAL (part))
18385       || const_ok_for_arm (~INTVAL (part)))
18386     return true;
18387
18388   return false;
18389 }
18390
18391 /* Return true if it is possible to inline both the high and low parts
18392    of a 64-bit constant into 32-bit data processing instructions.  */
18393 bool
18394 arm_const_double_by_immediates (rtx val)
18395 {
18396   machine_mode mode = GET_MODE (val);
18397   rtx part;
18398
18399   if (mode == VOIDmode)
18400     mode = DImode;
18401
18402   part = gen_highpart_mode (SImode, mode, val);
18403
18404   gcc_assert (CONST_INT_P (part));
18405
18406   if (!const_ok_for_arm (INTVAL (part)))
18407     return false;
18408
18409   part = gen_lowpart (SImode, val);
18410
18411   gcc_assert (CONST_INT_P (part));
18412
18413   if (!const_ok_for_arm (INTVAL (part)))
18414     return false;
18415
18416   return true;
18417 }
18418
18419 /* Scan INSN and note any of its operands that need fixing.
18420    If DO_PUSHES is false we do not actually push any of the fixups
18421    needed.  */
18422 static void
18423 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18424 {
18425   int opno;
18426
18427   extract_constrain_insn (insn);
18428
18429   if (recog_data.n_alternatives == 0)
18430     return;
18431
18432   /* Fill in recog_op_alt with information about the constraints of
18433      this insn.  */
18434   preprocess_constraints (insn);
18435
18436   const operand_alternative *op_alt = which_op_alt ();
18437   for (opno = 0; opno < recog_data.n_operands; opno++)
18438     {
18439       /* Things we need to fix can only occur in inputs.  */
18440       if (recog_data.operand_type[opno] != OP_IN)
18441         continue;
18442
18443       /* If this alternative is a memory reference, then any mention
18444          of constants in this alternative is really to fool reload
18445          into allowing us to accept one there.  We need to fix them up
18446          now so that we output the right code.  */
18447       if (op_alt[opno].memory_ok)
18448         {
18449           rtx op = recog_data.operand[opno];
18450
18451           if (CONSTANT_P (op))
18452             {
18453               if (do_pushes)
18454                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18455                                    recog_data.operand_mode[opno], op);
18456             }
18457           else if (MEM_P (op)
18458                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18459                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18460             {
18461               if (do_pushes)
18462                 {
18463                   rtx cop = avoid_constant_pool_reference (op);
18464
18465                   /* Casting the address of something to a mode narrower
18466                      than a word can cause avoid_constant_pool_reference()
18467                      to return the pool reference itself.  That's no good to
18468                      us here.  Lets just hope that we can use the
18469                      constant pool value directly.  */
18470                   if (op == cop)
18471                     cop = get_pool_constant (XEXP (op, 0));
18472
18473                   push_minipool_fix (insn, address,
18474                                      recog_data.operand_loc[opno],
18475                                      recog_data.operand_mode[opno], cop);
18476                 }
18477
18478             }
18479         }
18480     }
18481
18482   return;
18483 }
18484
18485 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18486    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18487    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18488    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18489    or four masks, depending on whether it is being computed for a
18490    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18491    respectively.  The tree for the type of the argument or a field within an
18492    argument is passed in ARG_TYPE, the current register this argument or field
18493    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18494    argument or field starts at is passed in STARTING_BIT and the last used bit
18495    is kept in LAST_USED_BIT which is also updated accordingly.  */
18496
18497 static unsigned HOST_WIDE_INT
18498 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18499                                uint32_t * padding_bits_to_clear,
18500                                unsigned starting_bit, int * last_used_bit)
18501
18502 {
18503   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18504
18505   if (TREE_CODE (arg_type) == RECORD_TYPE)
18506     {
18507       unsigned current_bit = starting_bit;
18508       tree field;
18509       long int offset, size;
18510
18511
18512       field = TYPE_FIELDS (arg_type);
18513       while (field)
18514         {
18515           /* The offset within a structure is always an offset from
18516              the start of that structure.  Make sure we take that into the
18517              calculation of the register based offset that we use here.  */
18518           offset = starting_bit;
18519           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18520           offset %= 32;
18521
18522           /* This is the actual size of the field, for bitfields this is the
18523              bitfield width and not the container size.  */
18524           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18525
18526           if (*last_used_bit != offset)
18527             {
18528               if (offset < *last_used_bit)
18529                 {
18530                   /* This field's offset is before the 'last_used_bit', that
18531                      means this field goes on the next register.  So we need to
18532                      pad the rest of the current register and increase the
18533                      register number.  */
18534                   uint32_t mask;
18535                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18536                   mask++;
18537
18538                   padding_bits_to_clear[*regno] |= mask;
18539                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18540                   (*regno)++;
18541                 }
18542               else
18543                 {
18544                   /* Otherwise we pad the bits between the last field's end and
18545                      the start of the new field.  */
18546                   uint32_t mask;
18547
18548                   mask = ((uint32_t)-1) >> (32 - offset);
18549                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18550                   padding_bits_to_clear[*regno] |= mask;
18551                 }
18552               current_bit = offset;
18553             }
18554
18555           /* Calculate further padding bits for inner structs/unions too.  */
18556           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18557             {
18558               *last_used_bit = current_bit;
18559               not_to_clear_reg_mask
18560                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18561                                                   padding_bits_to_clear, offset,
18562                                                   last_used_bit);
18563             }
18564           else
18565             {
18566               /* Update 'current_bit' with this field's size.  If the
18567                  'current_bit' lies in a subsequent register, update 'regno' and
18568                  reset 'current_bit' to point to the current bit in that new
18569                  register.  */
18570               current_bit += size;
18571               while (current_bit >= 32)
18572                 {
18573                   current_bit-=32;
18574                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18575                   (*regno)++;
18576                 }
18577               *last_used_bit = current_bit;
18578             }
18579
18580           field = TREE_CHAIN (field);
18581         }
18582       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18583     }
18584   else if (TREE_CODE (arg_type) == UNION_TYPE)
18585     {
18586       tree field, field_t;
18587       int i, regno_t, field_size;
18588       int max_reg = -1;
18589       int max_bit = -1;
18590       uint32_t mask;
18591       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18592         = {-1, -1, -1, -1};
18593
18594       /* To compute the padding bits in a union we only consider bits as
18595          padding bits if they are always either a padding bit or fall outside a
18596          fields size for all fields in the union.  */
18597       field = TYPE_FIELDS (arg_type);
18598       while (field)
18599         {
18600           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18601             = {0U, 0U, 0U, 0U};
18602           int last_used_bit_t = *last_used_bit;
18603           regno_t = *regno;
18604           field_t = TREE_TYPE (field);
18605
18606           /* If the field's type is either a record or a union make sure to
18607              compute their padding bits too.  */
18608           if (RECORD_OR_UNION_TYPE_P (field_t))
18609             not_to_clear_reg_mask
18610               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18611                                                 &padding_bits_to_clear_t[0],
18612                                                 starting_bit, &last_used_bit_t);
18613           else
18614             {
18615               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18616               regno_t = (field_size / 32) + *regno;
18617               last_used_bit_t = (starting_bit + field_size) % 32;
18618             }
18619
18620           for (i = *regno; i < regno_t; i++)
18621             {
18622               /* For all but the last register used by this field only keep the
18623                  padding bits that were padding bits in this field.  */
18624               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18625             }
18626
18627             /* For the last register, keep all padding bits that were padding
18628                bits in this field and any padding bits that are still valid
18629                as padding bits but fall outside of this field's size.  */
18630             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18631             padding_bits_to_clear_res[regno_t]
18632               &= padding_bits_to_clear_t[regno_t] | mask;
18633
18634           /* Update the maximum size of the fields in terms of registers used
18635              ('max_reg') and the 'last_used_bit' in said register.  */
18636           if (max_reg < regno_t)
18637             {
18638               max_reg = regno_t;
18639               max_bit = last_used_bit_t;
18640             }
18641           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18642             max_bit = last_used_bit_t;
18643
18644           field = TREE_CHAIN (field);
18645         }
18646
18647       /* Update the current padding_bits_to_clear using the intersection of the
18648          padding bits of all the fields.  */
18649       for (i=*regno; i < max_reg; i++)
18650         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18651
18652       /* Do not keep trailing padding bits, we do not know yet whether this
18653          is the end of the argument.  */
18654       mask = ((uint32_t) 1 << max_bit) - 1;
18655       padding_bits_to_clear[max_reg]
18656         |= padding_bits_to_clear_res[max_reg] & mask;
18657
18658       *regno = max_reg;
18659       *last_used_bit = max_bit;
18660     }
18661   else
18662     /* This function should only be used for structs and unions.  */
18663     gcc_unreachable ();
18664
18665   return not_to_clear_reg_mask;
18666 }
18667
18668 /* In the context of ARMv8-M Security Extensions, this function is used for both
18669    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18670    registers are used when returning or passing arguments, which is then
18671    returned as a mask.  It will also compute a mask to indicate padding/unused
18672    bits for each of these registers, and passes this through the
18673    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18674    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18675    the starting register used to pass this argument or return value is passed
18676    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18677    for struct and union types.  */
18678
18679 static unsigned HOST_WIDE_INT
18680 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18681                              uint32_t * padding_bits_to_clear)
18682
18683 {
18684   int last_used_bit = 0;
18685   unsigned HOST_WIDE_INT not_to_clear_mask;
18686
18687   if (RECORD_OR_UNION_TYPE_P (arg_type))
18688     {
18689       not_to_clear_mask
18690         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18691                                          padding_bits_to_clear, 0,
18692                                          &last_used_bit);
18693
18694
18695       /* If the 'last_used_bit' is not zero, that means we are still using a
18696          part of the last 'regno'.  In such cases we must clear the trailing
18697          bits.  Otherwise we are not using regno and we should mark it as to
18698          clear.  */
18699       if (last_used_bit != 0)
18700         padding_bits_to_clear[regno]
18701           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18702       else
18703         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18704     }
18705   else
18706     {
18707       not_to_clear_mask = 0;
18708       /* We are not dealing with structs nor unions.  So these arguments may be
18709          passed in floating point registers too.  In some cases a BLKmode is
18710          used when returning or passing arguments in multiple VFP registers.  */
18711       if (GET_MODE (arg_rtx) == BLKmode)
18712         {
18713           int i, arg_regs;
18714           rtx reg;
18715
18716           /* This should really only occur when dealing with the hard-float
18717              ABI.  */
18718           gcc_assert (TARGET_HARD_FLOAT_ABI);
18719
18720           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18721             {
18722               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18723               gcc_assert (REG_P (reg));
18724
18725               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18726
18727               /* If we are dealing with DF mode, make sure we don't
18728                  clear either of the registers it addresses.  */
18729               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18730               if (arg_regs > 1)
18731                 {
18732                   unsigned HOST_WIDE_INT mask;
18733                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18734                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18735                   not_to_clear_mask |= mask;
18736                 }
18737             }
18738         }
18739       else
18740         {
18741           /* Otherwise we can rely on the MODE to determine how many registers
18742              are being used by this argument.  */
18743           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18744           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18745           if (arg_regs > 1)
18746             {
18747               unsigned HOST_WIDE_INT
18748               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18749               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18750               not_to_clear_mask |= mask;
18751             }
18752         }
18753     }
18754
18755   return not_to_clear_mask;
18756 }
18757
18758 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18759    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18760    are to be fully cleared, using the value in register CLEARING_REG if more
18761    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18762    the bits that needs to be cleared in caller-saved core registers, with
18763    SCRATCH_REG used as a scratch register for that clearing.
18764
18765    NOTE: one of three following assertions must hold:
18766    - SCRATCH_REG is a low register
18767    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18768      in TO_CLEAR_BITMAP)
18769    - CLEARING_REG is a low register.  */
18770
18771 static void
18772 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18773                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18774 {
18775   bool saved_clearing = false;
18776   rtx saved_clearing_reg = NULL_RTX;
18777   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18778
18779   gcc_assert (arm_arch_cmse);
18780
18781   if (!bitmap_empty_p (to_clear_bitmap))
18782     {
18783       minregno = bitmap_first_set_bit (to_clear_bitmap);
18784       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18785     }
18786   clearing_regno = REGNO (clearing_reg);
18787
18788   /* Clear padding bits.  */
18789   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18790   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18791     {
18792       uint64_t mask;
18793       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18794
18795       if (padding_bits_to_clear[i] == 0)
18796         continue;
18797
18798       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18799          CLEARING_REG as scratch.  */
18800       if (TARGET_THUMB1
18801           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18802         {
18803           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18804              such that we can use clearing_reg to clear the unused bits in the
18805              arguments.  */
18806           if ((clearing_regno > maxregno
18807                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18808               && !saved_clearing)
18809             {
18810               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18811               emit_move_insn (scratch_reg, clearing_reg);
18812               saved_clearing = true;
18813               saved_clearing_reg = scratch_reg;
18814             }
18815           scratch_reg = clearing_reg;
18816         }
18817
18818       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18819       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18820       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18821
18822       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18823       mask = (~padding_bits_to_clear[i]) >> 16;
18824       rtx16 = gen_int_mode (16, SImode);
18825       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18826       if (mask)
18827         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18828
18829       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18830     }
18831   if (saved_clearing)
18832     emit_move_insn (clearing_reg, saved_clearing_reg);
18833
18834
18835   /* Clear full registers.  */
18836
18837   if (TARGET_HAVE_FPCXT_CMSE)
18838     {
18839       rtvec vunspec_vec;
18840       int i, j, k, nb_regs;
18841       rtx use_seq, par, reg, set, vunspec;
18842       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18843       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18844       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18845
18846       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18847         {
18848           /* Find next register to clear and exit if none.  */
18849           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18850           if (i > maxregno)
18851             break;
18852
18853           /* Compute number of consecutive registers to clear.  */
18854           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18855                j++);
18856           nb_regs = j - i;
18857
18858           /* Create VSCCLRM RTX pattern.  */
18859           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18860           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18861           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18862                                              VUNSPEC_VSCCLRM_VPR);
18863           XVECEXP (par, 0, 0) = vunspec;
18864
18865           /* Insert VFP register clearing RTX in the pattern.  */
18866           start_sequence ();
18867           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18868             {
18869               if (!bitmap_bit_p (to_clear_bitmap, j))
18870                 continue;
18871
18872               reg = gen_rtx_REG (SFmode, j);
18873               set = gen_rtx_SET (reg, const0_rtx);
18874               XVECEXP (par, 0, k++) = set;
18875               emit_use (reg);
18876             }
18877           use_seq = get_insns ();
18878           end_sequence ();
18879
18880           emit_insn_after (use_seq, emit_insn (par));
18881         }
18882
18883       /* Get set of core registers to clear.  */
18884       bitmap_clear (core_regs_bitmap);
18885       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18886                         IP_REGNUM - R0_REGNUM + 1);
18887       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18888                   core_regs_bitmap);
18889       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18890
18891       if (bitmap_empty_p (to_clear_core_bitmap))
18892         return;
18893
18894       /* Create clrm RTX pattern.  */
18895       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18896       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18897
18898       /* Insert core register clearing RTX in the pattern.  */
18899       start_sequence ();
18900       for (j = 0, i = minregno; j < nb_regs; i++)
18901         {
18902           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18903             continue;
18904
18905           reg = gen_rtx_REG (SImode, i);
18906           set = gen_rtx_SET (reg, const0_rtx);
18907           XVECEXP (par, 0, j++) = set;
18908           emit_use (reg);
18909         }
18910
18911       /* Insert APSR register clearing RTX in the pattern
18912        * along with clobbering CC.  */
18913       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18914       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18915                                          VUNSPEC_CLRM_APSR);
18916
18917       XVECEXP (par, 0, j++) = vunspec;
18918
18919       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18920       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18921       XVECEXP (par, 0, j) = clobber;
18922
18923       use_seq = get_insns ();
18924       end_sequence ();
18925
18926       emit_insn_after (use_seq, emit_insn (par));
18927     }
18928   else
18929     {
18930       /* If not marked for clearing, clearing_reg already does not contain
18931          any secret.  */
18932       if (clearing_regno <= maxregno
18933           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18934         {
18935           emit_move_insn (clearing_reg, const0_rtx);
18936           emit_use (clearing_reg);
18937           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18938         }
18939
18940       for (regno = minregno; regno <= maxregno; regno++)
18941         {
18942           if (!bitmap_bit_p (to_clear_bitmap, regno))
18943             continue;
18944
18945           if (IS_VFP_REGNUM (regno))
18946             {
18947               /* If regno is an even vfp register and its successor is also to
18948                  be cleared, use vmov.  */
18949               if (TARGET_VFP_DOUBLE
18950                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18951                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18952                 {
18953                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18954                                   CONST1_RTX (DFmode));
18955                   emit_use (gen_rtx_REG (DFmode, regno));
18956                   regno++;
18957                 }
18958               else
18959                 {
18960                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18961                                   CONST1_RTX (SFmode));
18962                   emit_use (gen_rtx_REG (SFmode, regno));
18963                 }
18964             }
18965           else
18966             {
18967               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18968               emit_use (gen_rtx_REG (SImode, regno));
18969             }
18970         }
18971     }
18972 }
18973
18974 /* Clear core and caller-saved VFP registers not used to pass arguments before
18975    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18976    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18977    libgcc/config/arm/cmse_nonsecure_call.S.  */
18978
18979 static void
18980 cmse_nonsecure_call_inline_register_clear (void)
18981 {
18982   basic_block bb;
18983
18984   FOR_EACH_BB_FN (bb, cfun)
18985     {
18986       rtx_insn *insn;
18987
18988       FOR_BB_INSNS (bb, insn)
18989         {
18990           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18991           /* frame = VFP regs + FPSCR + VPR.  */
18992           unsigned lazy_store_stack_frame_size
18993             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18994           unsigned long callee_saved_mask
18995             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18996             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18997           unsigned address_regnum, regno;
18998           unsigned max_int_regno
18999             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
19000           unsigned max_fp_regno
19001             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
19002           unsigned maxregno
19003             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
19004           auto_sbitmap to_clear_bitmap (maxregno + 1);
19005           rtx_insn *seq;
19006           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
19007           rtx address;
19008           CUMULATIVE_ARGS args_so_far_v;
19009           cumulative_args_t args_so_far;
19010           tree arg_type, fntype;
19011           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
19012           function_args_iterator args_iter;
19013           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
19014
19015           if (!NONDEBUG_INSN_P (insn))
19016             continue;
19017
19018           if (!CALL_P (insn))
19019             continue;
19020
19021           pat = PATTERN (insn);
19022           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
19023           call = XVECEXP (pat, 0, 0);
19024
19025           /* Get the real call RTX if the insn sets a value, ie. returns.  */
19026           if (GET_CODE (call) == SET)
19027               call = SET_SRC (call);
19028
19029           /* Check if it is a cmse_nonsecure_call.  */
19030           unspec = XEXP (call, 0);
19031           if (GET_CODE (unspec) != UNSPEC
19032               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
19033             continue;
19034
19035           /* Mark registers that needs to be cleared.  Those that holds a
19036              parameter are removed from the set further below.  */
19037           bitmap_clear (to_clear_bitmap);
19038           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
19039                             max_int_regno - R0_REGNUM + 1);
19040
19041           /* Only look at the caller-saved floating point registers in case of
19042              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
19043              lazy store and loads which clear both caller- and callee-saved
19044              registers.  */
19045           if (!lazy_fpclear)
19046             {
19047               auto_sbitmap float_bitmap (maxregno + 1);
19048
19049               bitmap_clear (float_bitmap);
19050               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19051                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
19052               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19053             }
19054
19055           /* Make sure the register used to hold the function address is not
19056              cleared.  */
19057           address = RTVEC_ELT (XVEC (unspec, 0), 0);
19058           gcc_assert (MEM_P (address));
19059           gcc_assert (REG_P (XEXP (address, 0)));
19060           address_regnum = REGNO (XEXP (address, 0));
19061           if (address_regnum <= max_int_regno)
19062             bitmap_clear_bit (to_clear_bitmap, address_regnum);
19063
19064           /* Set basic block of call insn so that df rescan is performed on
19065              insns inserted here.  */
19066           set_block_for_insn (insn, bb);
19067           df_set_flags (DF_DEFER_INSN_RESCAN);
19068           start_sequence ();
19069
19070           /* Make sure the scheduler doesn't schedule other insns beyond
19071              here.  */
19072           emit_insn (gen_blockage ());
19073
19074           /* Walk through all arguments and clear registers appropriately.
19075           */
19076           fntype = TREE_TYPE (MEM_EXPR (address));
19077           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19078                                     NULL_TREE);
19079           args_so_far = pack_cumulative_args (&args_so_far_v);
19080           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19081             {
19082               rtx arg_rtx;
19083               uint64_t to_clear_args_mask;
19084
19085               if (VOID_TYPE_P (arg_type))
19086                 continue;
19087
19088               function_arg_info arg (arg_type, /*named=*/true);
19089               if (!first_param)
19090                 /* ??? We should advance after processing the argument and pass
19091                    the argument we're advancing past.  */
19092                 arm_function_arg_advance (args_so_far, arg);
19093
19094               arg_rtx = arm_function_arg (args_so_far, arg);
19095               gcc_assert (REG_P (arg_rtx));
19096               to_clear_args_mask
19097                 = compute_not_to_clear_mask (arg_type, arg_rtx,
19098                                              REGNO (arg_rtx),
19099                                              &padding_bits_to_clear[0]);
19100               if (to_clear_args_mask)
19101                 {
19102                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
19103                     {
19104                       if (to_clear_args_mask & (1ULL << regno))
19105                         bitmap_clear_bit (to_clear_bitmap, regno);
19106                     }
19107                 }
19108
19109               first_param = false;
19110             }
19111
19112           /* We use right shift and left shift to clear the LSB of the address
19113              we jump to instead of using bic, to avoid having to use an extra
19114              register on Thumb-1.  */
19115           clearing_reg = XEXP (address, 0);
19116           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19117           emit_insn (gen_rtx_SET (clearing_reg, shift));
19118           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19119           emit_insn (gen_rtx_SET (clearing_reg, shift));
19120
19121           if (clear_callee_saved)
19122             {
19123               rtx push_insn =
19124                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19125               /* Disable frame debug info in push because it needs to be
19126                  disabled for pop (see below).  */
19127               RTX_FRAME_RELATED_P (push_insn) = 0;
19128
19129               /* Lazy store multiple.  */
19130               if (lazy_fpclear)
19131                 {
19132                   rtx imm;
19133                   rtx_insn *add_insn;
19134
19135                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19136                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19137                                                     stack_pointer_rtx, imm));
19138                   /* If we have the frame pointer, then it will be the
19139                      CFA reg.  Otherwise, the stack pointer is the CFA
19140                      reg, so we need to emit a CFA adjust.  */
19141                   if (!frame_pointer_needed)
19142                     arm_add_cfa_adjust_cfa_note (add_insn,
19143                                                  - lazy_store_stack_frame_size,
19144                                                  stack_pointer_rtx,
19145                                                  stack_pointer_rtx);
19146                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19147                 }
19148               /* Save VFP callee-saved registers.  */
19149               else
19150                 {
19151                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19152                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
19153                   /* Disable frame debug info in push because it needs to be
19154                      disabled for vpop (see below).  */
19155                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19156                 }
19157             }
19158
19159           /* Clear caller-saved registers that leak before doing a non-secure
19160              call.  */
19161           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19162           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19163                                 NUM_ARG_REGS, ip_reg, clearing_reg);
19164
19165           seq = get_insns ();
19166           end_sequence ();
19167           emit_insn_before (seq, insn);
19168
19169           if (TARGET_HAVE_FPCXT_CMSE)
19170             {
19171               rtx_insn *last, *pop_insn, *after = insn;
19172
19173               start_sequence ();
19174
19175               /* Lazy load multiple done as part of libcall in Armv8-M.  */
19176               if (lazy_fpclear)
19177                 {
19178                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19179                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19180                   rtx_insn *add_insn =
19181                     emit_insn (gen_addsi3 (stack_pointer_rtx,
19182                                            stack_pointer_rtx, imm));
19183                   if (!frame_pointer_needed)
19184                     arm_add_cfa_adjust_cfa_note (add_insn,
19185                                                  lazy_store_stack_frame_size,
19186                                                  stack_pointer_rtx,
19187                                                  stack_pointer_rtx);
19188                 }
19189               /* Restore VFP callee-saved registers.  */
19190               else
19191                 {
19192                   int nb_callee_saved_vfp_regs =
19193                     (max_fp_regno - D7_VFP_REGNUM) / 2;
19194                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19195                                               nb_callee_saved_vfp_regs,
19196                                               stack_pointer_rtx);
19197                   /* Disable frame debug info in vpop because the SP adjustment
19198                      is made using a CFA adjustment note while CFA used is
19199                      sometimes R7.  This then causes an assert failure in the
19200                      CFI note creation code.  */
19201                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19202                 }
19203
19204               arm_emit_multi_reg_pop (callee_saved_mask);
19205               pop_insn = get_last_insn ();
19206
19207               /* Disable frame debug info in pop because they reset the state
19208                  of popped registers to what it was at the beginning of the
19209                  function, before the prologue.  This leads to incorrect state
19210                  when doing the pop after the nonsecure call for registers that
19211                  are pushed both in prologue and before the nonsecure call.
19212
19213                  It also occasionally triggers an assert failure in CFI note
19214                  creation code when there are two codepaths to the epilogue,
19215                  one of which does not go through the nonsecure call.
19216                  Obviously this mean that debugging between the push and pop is
19217                  not reliable.  */
19218               RTX_FRAME_RELATED_P (pop_insn) = 0;
19219
19220               seq = get_insns ();
19221               last = get_last_insn ();
19222               end_sequence ();
19223
19224               emit_insn_after (seq, after);
19225
19226               /* Skip pop we have just inserted after nonsecure call, we know
19227                  it does not contain a nonsecure call.  */
19228               insn = last;
19229             }
19230         }
19231     }
19232 }
19233
19234 /* Rewrite move insn into subtract of 0 if the condition codes will
19235    be useful in next conditional jump insn.  */
19236
19237 static void
19238 thumb1_reorg (void)
19239 {
19240   basic_block bb;
19241
19242   FOR_EACH_BB_FN (bb, cfun)
19243     {
19244       rtx dest, src;
19245       rtx cmp, op0, op1, set = NULL;
19246       rtx_insn *prev, *insn = BB_END (bb);
19247       bool insn_clobbered = false;
19248
19249       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19250         insn = PREV_INSN (insn);
19251
19252       /* Find the last cbranchsi4_insn in basic block BB.  */
19253       if (insn == BB_HEAD (bb)
19254           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19255         continue;
19256
19257       /* Get the register with which we are comparing.  */
19258       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19259       op0 = XEXP (cmp, 0);
19260       op1 = XEXP (cmp, 1);
19261
19262       /* Check that comparison is against ZERO.  */
19263       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19264         continue;
19265
19266       /* Find the first flag setting insn before INSN in basic block BB.  */
19267       gcc_assert (insn != BB_HEAD (bb));
19268       for (prev = PREV_INSN (insn);
19269            (!insn_clobbered
19270             && prev != BB_HEAD (bb)
19271             && (NOTE_P (prev)
19272                 || DEBUG_INSN_P (prev)
19273                 || ((set = single_set (prev)) != NULL
19274                     && get_attr_conds (prev) == CONDS_NOCOND)));
19275            prev = PREV_INSN (prev))
19276         {
19277           if (reg_set_p (op0, prev))
19278             insn_clobbered = true;
19279         }
19280
19281       /* Skip if op0 is clobbered by insn other than prev. */
19282       if (insn_clobbered)
19283         continue;
19284
19285       if (!set)
19286         continue;
19287
19288       dest = SET_DEST (set);
19289       src = SET_SRC (set);
19290       if (!low_register_operand (dest, SImode)
19291           || !low_register_operand (src, SImode))
19292         continue;
19293
19294       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19295          in INSN.  Both src and dest of the move insn are checked.  */
19296       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19297         {
19298           dest = copy_rtx (dest);
19299           src = copy_rtx (src);
19300           src = gen_rtx_MINUS (SImode, src, const0_rtx);
19301           PATTERN (prev) = gen_rtx_SET (dest, src);
19302           INSN_CODE (prev) = -1;
19303           /* Set test register in INSN to dest.  */
19304           XEXP (cmp, 0) = copy_rtx (dest);
19305           INSN_CODE (insn) = -1;
19306         }
19307     }
19308 }
19309
19310 /* Convert instructions to their cc-clobbering variant if possible, since
19311    that allows us to use smaller encodings.  */
19312
19313 static void
19314 thumb2_reorg (void)
19315 {
19316   basic_block bb;
19317   regset_head live;
19318
19319   INIT_REG_SET (&live);
19320
19321   /* We are freeing block_for_insn in the toplev to keep compatibility
19322      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
19323   compute_bb_for_insn ();
19324   df_analyze ();
19325
19326   enum Convert_Action {SKIP, CONV, SWAP_CONV};
19327
19328   FOR_EACH_BB_FN (bb, cfun)
19329     {
19330       if ((current_tune->disparage_flag_setting_t16_encodings
19331            == tune_params::DISPARAGE_FLAGS_ALL)
19332           && optimize_bb_for_speed_p (bb))
19333         continue;
19334
19335       rtx_insn *insn;
19336       Convert_Action action = SKIP;
19337       Convert_Action action_for_partial_flag_setting
19338         = ((current_tune->disparage_flag_setting_t16_encodings
19339             != tune_params::DISPARAGE_FLAGS_NEITHER)
19340            && optimize_bb_for_speed_p (bb))
19341           ? SKIP : CONV;
19342
19343       COPY_REG_SET (&live, DF_LR_OUT (bb));
19344       df_simulate_initialize_backwards (bb, &live);
19345       FOR_BB_INSNS_REVERSE (bb, insn)
19346         {
19347           if (NONJUMP_INSN_P (insn)
19348               && !REGNO_REG_SET_P (&live, CC_REGNUM)
19349               && GET_CODE (PATTERN (insn)) == SET)
19350             {
19351               action = SKIP;
19352               rtx pat = PATTERN (insn);
19353               rtx dst = XEXP (pat, 0);
19354               rtx src = XEXP (pat, 1);
19355               rtx op0 = NULL_RTX, op1 = NULL_RTX;
19356
19357               if (UNARY_P (src) || BINARY_P (src))
19358                   op0 = XEXP (src, 0);
19359
19360               if (BINARY_P (src))
19361                   op1 = XEXP (src, 1);
19362
19363               if (low_register_operand (dst, SImode))
19364                 {
19365                   switch (GET_CODE (src))
19366                     {
19367                     case PLUS:
19368                       /* Adding two registers and storing the result
19369                          in the first source is already a 16-bit
19370                          operation.  */
19371                       if (rtx_equal_p (dst, op0)
19372                           && register_operand (op1, SImode))
19373                         break;
19374
19375                       if (low_register_operand (op0, SImode))
19376                         {
19377                           /* ADDS <Rd>,<Rn>,<Rm>  */
19378                           if (low_register_operand (op1, SImode))
19379                             action = CONV;
19380                           /* ADDS <Rdn>,#<imm8>  */
19381                           /* SUBS <Rdn>,#<imm8>  */
19382                           else if (rtx_equal_p (dst, op0)
19383                                    && CONST_INT_P (op1)
19384                                    && IN_RANGE (INTVAL (op1), -255, 255))
19385                             action = CONV;
19386                           /* ADDS <Rd>,<Rn>,#<imm3>  */
19387                           /* SUBS <Rd>,<Rn>,#<imm3>  */
19388                           else if (CONST_INT_P (op1)
19389                                    && IN_RANGE (INTVAL (op1), -7, 7))
19390                             action = CONV;
19391                         }
19392                       /* ADCS <Rd>, <Rn>  */
19393                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19394                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19395                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19396                                                        SImode)
19397                               && COMPARISON_P (op1)
19398                               && cc_register (XEXP (op1, 0), VOIDmode)
19399                               && maybe_get_arm_condition_code (op1) == ARM_CS
19400                               && XEXP (op1, 1) == const0_rtx)
19401                         action = CONV;
19402                       break;
19403
19404                     case MINUS:
19405                       /* RSBS <Rd>,<Rn>,#0
19406                          Not handled here: see NEG below.  */
19407                       /* SUBS <Rd>,<Rn>,#<imm3>
19408                          SUBS <Rdn>,#<imm8>
19409                          Not handled here: see PLUS above.  */
19410                       /* SUBS <Rd>,<Rn>,<Rm>  */
19411                       if (low_register_operand (op0, SImode)
19412                           && low_register_operand (op1, SImode))
19413                             action = CONV;
19414                       break;
19415
19416                     case MULT:
19417                       /* MULS <Rdm>,<Rn>,<Rdm>
19418                          As an exception to the rule, this is only used
19419                          when optimizing for size since MULS is slow on all
19420                          known implementations.  We do not even want to use
19421                          MULS in cold code, if optimizing for speed, so we
19422                          test the global flag here.  */
19423                       if (!optimize_size)
19424                         break;
19425                       /* Fall through.  */
19426                     case AND:
19427                     case IOR:
19428                     case XOR:
19429                       /* ANDS <Rdn>,<Rm>  */
19430                       if (rtx_equal_p (dst, op0)
19431                           && low_register_operand (op1, SImode))
19432                         action = action_for_partial_flag_setting;
19433                       else if (rtx_equal_p (dst, op1)
19434                                && low_register_operand (op0, SImode))
19435                         action = action_for_partial_flag_setting == SKIP
19436                                  ? SKIP : SWAP_CONV;
19437                       break;
19438
19439                     case ASHIFTRT:
19440                     case ASHIFT:
19441                     case LSHIFTRT:
19442                       /* ASRS <Rdn>,<Rm> */
19443                       /* LSRS <Rdn>,<Rm> */
19444                       /* LSLS <Rdn>,<Rm> */
19445                       if (rtx_equal_p (dst, op0)
19446                           && low_register_operand (op1, SImode))
19447                         action = action_for_partial_flag_setting;
19448                       /* ASRS <Rd>,<Rm>,#<imm5> */
19449                       /* LSRS <Rd>,<Rm>,#<imm5> */
19450                       /* LSLS <Rd>,<Rm>,#<imm5> */
19451                       else if (low_register_operand (op0, SImode)
19452                                && CONST_INT_P (op1)
19453                                && IN_RANGE (INTVAL (op1), 0, 31))
19454                         action = action_for_partial_flag_setting;
19455                       break;
19456
19457                     case ROTATERT:
19458                       /* RORS <Rdn>,<Rm>  */
19459                       if (rtx_equal_p (dst, op0)
19460                           && low_register_operand (op1, SImode))
19461                         action = action_for_partial_flag_setting;
19462                       break;
19463
19464                     case NOT:
19465                       /* MVNS <Rd>,<Rm>  */
19466                       if (low_register_operand (op0, SImode))
19467                         action = action_for_partial_flag_setting;
19468                       break;
19469
19470                     case NEG:
19471                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19472                       if (low_register_operand (op0, SImode))
19473                         action = CONV;
19474                       break;
19475
19476                     case CONST_INT:
19477                       /* MOVS <Rd>,#<imm8>  */
19478                       if (CONST_INT_P (src)
19479                           && IN_RANGE (INTVAL (src), 0, 255))
19480                         action = action_for_partial_flag_setting;
19481                       break;
19482
19483                     case REG:
19484                       /* MOVS and MOV<c> with registers have different
19485                          encodings, so are not relevant here.  */
19486                       break;
19487
19488                     default:
19489                       break;
19490                     }
19491                 }
19492
19493               if (action != SKIP)
19494                 {
19495                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19496                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19497                   rtvec vec;
19498
19499                   if (action == SWAP_CONV)
19500                     {
19501                       src = copy_rtx (src);
19502                       XEXP (src, 0) = op1;
19503                       XEXP (src, 1) = op0;
19504                       pat = gen_rtx_SET (dst, src);
19505                       vec = gen_rtvec (2, pat, clobber);
19506                     }
19507                   else /* action == CONV */
19508                     vec = gen_rtvec (2, pat, clobber);
19509
19510                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19511                   INSN_CODE (insn) = -1;
19512                 }
19513             }
19514
19515           if (NONDEBUG_INSN_P (insn))
19516             df_simulate_one_insn_backwards (bb, insn, &live);
19517         }
19518     }
19519
19520   CLEAR_REG_SET (&live);
19521 }
19522
19523 /* Gcc puts the pool in the wrong place for ARM, since we can only
19524    load addresses a limited distance around the pc.  We do some
19525    special munging to move the constant pool values to the correct
19526    point in the code.  */
19527 static void
19528 arm_reorg (void)
19529 {
19530   rtx_insn *insn;
19531   HOST_WIDE_INT address = 0;
19532   Mfix * fix;
19533
19534   if (use_cmse)
19535     cmse_nonsecure_call_inline_register_clear ();
19536
19537   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19538   if (cfun->is_thunk)
19539     ;
19540   else if (TARGET_THUMB1)
19541     thumb1_reorg ();
19542   else if (TARGET_THUMB2)
19543     thumb2_reorg ();
19544
19545   /* Ensure all insns that must be split have been split at this point.
19546      Otherwise, the pool placement code below may compute incorrect
19547      insn lengths.  Note that when optimizing, all insns have already
19548      been split at this point.  */
19549   if (!optimize)
19550     split_all_insns_noflow ();
19551
19552   /* Make sure we do not attempt to create a literal pool even though it should
19553      no longer be necessary to create any.  */
19554   if (arm_disable_literal_pool)
19555     return ;
19556
19557   minipool_fix_head = minipool_fix_tail = NULL;
19558
19559   /* The first insn must always be a note, or the code below won't
19560      scan it properly.  */
19561   insn = get_insns ();
19562   gcc_assert (NOTE_P (insn));
19563   minipool_pad = 0;
19564
19565   /* Scan all the insns and record the operands that will need fixing.  */
19566   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19567     {
19568       if (BARRIER_P (insn))
19569         push_minipool_barrier (insn, address);
19570       else if (INSN_P (insn))
19571         {
19572           rtx_jump_table_data *table;
19573
19574           note_invalid_constants (insn, address, true);
19575           address += get_attr_length (insn);
19576
19577           /* If the insn is a vector jump, add the size of the table
19578              and skip the table.  */
19579           if (tablejump_p (insn, NULL, &table))
19580             {
19581               address += get_jump_table_size (table);
19582               insn = table;
19583             }
19584         }
19585       else if (LABEL_P (insn))
19586         /* Add the worst-case padding due to alignment.  We don't add
19587            the _current_ padding because the minipool insertions
19588            themselves might change it.  */
19589         address += get_label_padding (insn);
19590     }
19591
19592   fix = minipool_fix_head;
19593
19594   /* Now scan the fixups and perform the required changes.  */
19595   while (fix)
19596     {
19597       Mfix * ftmp;
19598       Mfix * fdel;
19599       Mfix *  last_added_fix;
19600       Mfix * last_barrier = NULL;
19601       Mfix * this_fix;
19602
19603       /* Skip any further barriers before the next fix.  */
19604       while (fix && BARRIER_P (fix->insn))
19605         fix = fix->next;
19606
19607       /* No more fixes.  */
19608       if (fix == NULL)
19609         break;
19610
19611       last_added_fix = NULL;
19612
19613       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19614         {
19615           if (BARRIER_P (ftmp->insn))
19616             {
19617               if (ftmp->address >= minipool_vector_head->max_address)
19618                 break;
19619
19620               last_barrier = ftmp;
19621             }
19622           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19623             break;
19624
19625           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19626         }
19627
19628       /* If we found a barrier, drop back to that; any fixes that we
19629          could have reached but come after the barrier will now go in
19630          the next mini-pool.  */
19631       if (last_barrier != NULL)
19632         {
19633           /* Reduce the refcount for those fixes that won't go into this
19634              pool after all.  */
19635           for (fdel = last_barrier->next;
19636                fdel && fdel != ftmp;
19637                fdel = fdel->next)
19638             {
19639               fdel->minipool->refcount--;
19640               fdel->minipool = NULL;
19641             }
19642
19643           ftmp = last_barrier;
19644         }
19645       else
19646         {
19647           /* ftmp is first fix that we can't fit into this pool and
19648              there no natural barriers that we could use.  Insert a
19649              new barrier in the code somewhere between the previous
19650              fix and this one, and arrange to jump around it.  */
19651           HOST_WIDE_INT max_address;
19652
19653           /* The last item on the list of fixes must be a barrier, so
19654              we can never run off the end of the list of fixes without
19655              last_barrier being set.  */
19656           gcc_assert (ftmp);
19657
19658           max_address = minipool_vector_head->max_address;
19659           /* Check that there isn't another fix that is in range that
19660              we couldn't fit into this pool because the pool was
19661              already too large: we need to put the pool before such an
19662              instruction.  The pool itself may come just after the
19663              fix because create_fix_barrier also allows space for a
19664              jump instruction.  */
19665           if (ftmp->address < max_address)
19666             max_address = ftmp->address + 1;
19667
19668           last_barrier = create_fix_barrier (last_added_fix, max_address);
19669         }
19670
19671       assign_minipool_offsets (last_barrier);
19672
19673       while (ftmp)
19674         {
19675           if (!BARRIER_P (ftmp->insn)
19676               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19677                   == NULL))
19678             break;
19679
19680           ftmp = ftmp->next;
19681         }
19682
19683       /* Scan over the fixes we have identified for this pool, fixing them
19684          up and adding the constants to the pool itself.  */
19685       for (this_fix = fix; this_fix && ftmp != this_fix;
19686            this_fix = this_fix->next)
19687         if (!BARRIER_P (this_fix->insn))
19688           {
19689             rtx addr
19690               = plus_constant (Pmode,
19691                                gen_rtx_LABEL_REF (VOIDmode,
19692                                                   minipool_vector_label),
19693                                this_fix->minipool->offset);
19694             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19695           }
19696
19697       dump_minipool (last_barrier->insn);
19698       fix = ftmp;
19699     }
19700
19701   /* From now on we must synthesize any constants that we can't handle
19702      directly.  This can happen if the RTL gets split during final
19703      instruction generation.  */
19704   cfun->machine->after_arm_reorg = 1;
19705
19706   /* Free the minipool memory.  */
19707   obstack_free (&minipool_obstack, minipool_startobj);
19708 }
19709 \f
19710 /* Routines to output assembly language.  */
19711
19712 /* Return string representation of passed in real value.  */
19713 static const char *
19714 fp_const_from_val (REAL_VALUE_TYPE *r)
19715 {
19716   if (!fp_consts_inited)
19717     init_fp_table ();
19718
19719   gcc_assert (real_equal (r, &value_fp0));
19720   return "0";
19721 }
19722
19723 /* OPERANDS[0] is the entire list of insns that constitute pop,
19724    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19725    is in the list, UPDATE is true iff the list contains explicit
19726    update of base register.  */
19727 void
19728 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19729                          bool update)
19730 {
19731   int i;
19732   char pattern[100];
19733   int offset;
19734   const char *conditional;
19735   int num_saves = XVECLEN (operands[0], 0);
19736   unsigned int regno;
19737   unsigned int regno_base = REGNO (operands[1]);
19738   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19739
19740   offset = 0;
19741   offset += update ? 1 : 0;
19742   offset += return_pc ? 1 : 0;
19743
19744   /* Is the base register in the list?  */
19745   for (i = offset; i < num_saves; i++)
19746     {
19747       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19748       /* If SP is in the list, then the base register must be SP.  */
19749       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19750       /* If base register is in the list, there must be no explicit update.  */
19751       if (regno == regno_base)
19752         gcc_assert (!update);
19753     }
19754
19755   conditional = reverse ? "%?%D0" : "%?%d0";
19756   /* Can't use POP if returning from an interrupt.  */
19757   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19758     sprintf (pattern, "pop%s\t{", conditional);
19759   else
19760     {
19761       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19762          It's just a convention, their semantics are identical.  */
19763       if (regno_base == SP_REGNUM)
19764         sprintf (pattern, "ldmfd%s\t", conditional);
19765       else if (update)
19766         sprintf (pattern, "ldmia%s\t", conditional);
19767       else
19768         sprintf (pattern, "ldm%s\t", conditional);
19769
19770       strcat (pattern, reg_names[regno_base]);
19771       if (update)
19772         strcat (pattern, "!, {");
19773       else
19774         strcat (pattern, ", {");
19775     }
19776
19777   /* Output the first destination register.  */
19778   strcat (pattern,
19779           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19780
19781   /* Output the rest of the destination registers.  */
19782   for (i = offset + 1; i < num_saves; i++)
19783     {
19784       strcat (pattern, ", ");
19785       strcat (pattern,
19786               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19787     }
19788
19789   strcat (pattern, "}");
19790
19791   if (interrupt_p && return_pc)
19792     strcat (pattern, "^");
19793
19794   output_asm_insn (pattern, &cond);
19795 }
19796
19797
19798 /* Output the assembly for a store multiple.  */
19799
19800 const char *
19801 vfp_output_vstmd (rtx * operands)
19802 {
19803   char pattern[100];
19804   int p;
19805   int base;
19806   int i;
19807   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19808                    ? XEXP (operands[0], 0)
19809                    : XEXP (XEXP (operands[0], 0), 0);
19810   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19811
19812   if (push_p)
19813     strcpy (pattern, "vpush%?.64\t{%P1");
19814   else
19815     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19816
19817   p = strlen (pattern);
19818
19819   gcc_assert (REG_P (operands[1]));
19820
19821   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19822   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19823     {
19824       p += sprintf (&pattern[p], ", d%d", base + i);
19825     }
19826   strcpy (&pattern[p], "}");
19827
19828   output_asm_insn (pattern, operands);
19829   return "";
19830 }
19831
19832
19833 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19834    number of bytes pushed.  */
19835
19836 static int
19837 vfp_emit_fstmd (int base_reg, int count)
19838 {
19839   rtx par;
19840   rtx dwarf;
19841   rtx tmp, reg;
19842   int i;
19843
19844   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19845      register pairs are stored by a store multiple insn.  We avoid this
19846      by pushing an extra pair.  */
19847   if (count == 2 && !arm_arch6)
19848     {
19849       if (base_reg == LAST_VFP_REGNUM - 3)
19850         base_reg -= 2;
19851       count++;
19852     }
19853
19854   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19855      larger stores into multiple parts (up to a maximum of two, in
19856      practice).  */
19857   if (count > 16)
19858     {
19859       int saved;
19860       /* NOTE: base_reg is an internal register number, so each D register
19861          counts as 2.  */
19862       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19863       saved += vfp_emit_fstmd (base_reg, 16);
19864       return saved;
19865     }
19866
19867   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19868   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19869
19870   reg = gen_rtx_REG (DFmode, base_reg);
19871   base_reg += 2;
19872
19873   XVECEXP (par, 0, 0)
19874     = gen_rtx_SET (gen_frame_mem
19875                    (BLKmode,
19876                     gen_rtx_PRE_MODIFY (Pmode,
19877                                         stack_pointer_rtx,
19878                                         plus_constant
19879                                         (Pmode, stack_pointer_rtx,
19880                                          - (count * 8)))
19881                     ),
19882                    gen_rtx_UNSPEC (BLKmode,
19883                                    gen_rtvec (1, reg),
19884                                    UNSPEC_PUSH_MULT));
19885
19886   tmp = gen_rtx_SET (stack_pointer_rtx,
19887                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19888   RTX_FRAME_RELATED_P (tmp) = 1;
19889   XVECEXP (dwarf, 0, 0) = tmp;
19890
19891   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19892   RTX_FRAME_RELATED_P (tmp) = 1;
19893   XVECEXP (dwarf, 0, 1) = tmp;
19894
19895   for (i = 1; i < count; i++)
19896     {
19897       reg = gen_rtx_REG (DFmode, base_reg);
19898       base_reg += 2;
19899       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19900
19901       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19902                                         plus_constant (Pmode,
19903                                                        stack_pointer_rtx,
19904                                                        i * 8)),
19905                          reg);
19906       RTX_FRAME_RELATED_P (tmp) = 1;
19907       XVECEXP (dwarf, 0, i + 1) = tmp;
19908     }
19909
19910   par = emit_insn (par);
19911   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19912   RTX_FRAME_RELATED_P (par) = 1;
19913
19914   return count * 8;
19915 }
19916
19917 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19918    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19919
19920 bool
19921 detect_cmse_nonsecure_call (tree addr)
19922 {
19923   if (!addr)
19924     return FALSE;
19925
19926   tree fntype = TREE_TYPE (addr);
19927   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19928                                     TYPE_ATTRIBUTES (fntype)))
19929     return TRUE;
19930   return FALSE;
19931 }
19932
19933
19934 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19935    the call target.  */
19936
19937 void
19938 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19939 {
19940   rtx insn;
19941
19942   insn = emit_call_insn (pat);
19943
19944   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19945      If the call might use such an entry, add a use of the PIC register
19946      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19947   if (TARGET_VXWORKS_RTP
19948       && flag_pic
19949       && !sibcall
19950       && SYMBOL_REF_P (addr)
19951       && (SYMBOL_REF_DECL (addr)
19952           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19953           : !SYMBOL_REF_LOCAL_P (addr)))
19954     {
19955       require_pic_register (NULL_RTX, false /*compute_now*/);
19956       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19957     }
19958
19959   if (TARGET_FDPIC)
19960     {
19961       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19962       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19963     }
19964
19965   if (TARGET_AAPCS_BASED)
19966     {
19967       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19968          linker.  We need to add an IP clobber to allow setting
19969          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19970          is not needed since it's a fixed register.  */
19971       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19972       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19973     }
19974 }
19975
19976 /* Output a 'call' insn.  */
19977 const char *
19978 output_call (rtx *operands)
19979 {
19980   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19981
19982   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19983   if (REGNO (operands[0]) == LR_REGNUM)
19984     {
19985       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19986       output_asm_insn ("mov%?\t%0, %|lr", operands);
19987     }
19988
19989   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19990
19991   if (TARGET_INTERWORK || arm_arch4t)
19992     output_asm_insn ("bx%?\t%0", operands);
19993   else
19994     output_asm_insn ("mov%?\t%|pc, %0", operands);
19995
19996   return "";
19997 }
19998
19999 /* Output a move from arm registers to arm registers of a long double
20000    OPERANDS[0] is the destination.
20001    OPERANDS[1] is the source.  */
20002 const char *
20003 output_mov_long_double_arm_from_arm (rtx *operands)
20004 {
20005   /* We have to be careful here because the two might overlap.  */
20006   int dest_start = REGNO (operands[0]);
20007   int src_start = REGNO (operands[1]);
20008   rtx ops[2];
20009   int i;
20010
20011   if (dest_start < src_start)
20012     {
20013       for (i = 0; i < 3; i++)
20014         {
20015           ops[0] = gen_rtx_REG (SImode, dest_start + i);
20016           ops[1] = gen_rtx_REG (SImode, src_start + i);
20017           output_asm_insn ("mov%?\t%0, %1", ops);
20018         }
20019     }
20020   else
20021     {
20022       for (i = 2; i >= 0; i--)
20023         {
20024           ops[0] = gen_rtx_REG (SImode, dest_start + i);
20025           ops[1] = gen_rtx_REG (SImode, src_start + i);
20026           output_asm_insn ("mov%?\t%0, %1", ops);
20027         }
20028     }
20029
20030   return "";
20031 }
20032
20033 void
20034 arm_emit_movpair (rtx dest, rtx src)
20035  {
20036   /* If the src is an immediate, simplify it.  */
20037   if (CONST_INT_P (src))
20038     {
20039       HOST_WIDE_INT val = INTVAL (src);
20040       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
20041       if ((val >> 16) & 0x0000ffff)
20042         {
20043           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20044                                                GEN_INT (16)),
20045                          GEN_INT ((val >> 16) & 0x0000ffff));
20046           rtx_insn *insn = get_last_insn ();
20047           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20048         }
20049       return;
20050     }
20051    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20052    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20053    rtx_insn *insn = get_last_insn ();
20054    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20055  }
20056
20057 /* Output a move between double words.  It must be REG<-MEM
20058    or MEM<-REG.  */
20059 const char *
20060 output_move_double (rtx *operands, bool emit, int *count)
20061 {
20062   enum rtx_code code0 = GET_CODE (operands[0]);
20063   enum rtx_code code1 = GET_CODE (operands[1]);
20064   rtx otherops[3];
20065   if (count)
20066     *count = 1;
20067
20068   /* The only case when this might happen is when
20069      you are looking at the length of a DImode instruction
20070      that has an invalid constant in it.  */
20071   if (code0 == REG && code1 != MEM)
20072     {
20073       gcc_assert (!emit);
20074       *count = 2;
20075       return "";
20076     }
20077
20078   if (code0 == REG)
20079     {
20080       unsigned int reg0 = REGNO (operands[0]);
20081       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20082
20083       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20084
20085       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
20086
20087       switch (GET_CODE (XEXP (operands[1], 0)))
20088         {
20089         case REG:
20090
20091           if (emit)
20092             {
20093               if (can_ldrd
20094                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20095                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20096               else
20097                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20098             }
20099           break;
20100
20101         case PRE_INC:
20102           gcc_assert (can_ldrd);
20103           if (emit)
20104             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20105           break;
20106
20107         case PRE_DEC:
20108           if (emit)
20109             {
20110               if (can_ldrd)
20111                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20112               else
20113                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20114             }
20115           break;
20116
20117         case POST_INC:
20118           if (emit)
20119             {
20120               if (can_ldrd)
20121                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20122               else
20123                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20124             }
20125           break;
20126
20127         case POST_DEC:
20128           gcc_assert (can_ldrd);
20129           if (emit)
20130             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20131           break;
20132
20133         case PRE_MODIFY:
20134         case POST_MODIFY:
20135           /* Autoicrement addressing modes should never have overlapping
20136              base and destination registers, and overlapping index registers
20137              are already prohibited, so this doesn't need to worry about
20138              fix_cm3_ldrd.  */
20139           otherops[0] = operands[0];
20140           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20141           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20142
20143           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20144             {
20145               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20146                 {
20147                   /* Registers overlap so split out the increment.  */
20148                   if (emit)
20149                     {
20150                       gcc_assert (can_ldrd);
20151                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
20152                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20153                     }
20154                   if (count)
20155                     *count = 2;
20156                 }
20157               else
20158                 {
20159                   /* Use a single insn if we can.
20160                      FIXME: IWMMXT allows offsets larger than ldrd can
20161                      handle, fix these up with a pair of ldr.  */
20162                   if (can_ldrd
20163                       && (TARGET_THUMB2
20164                       || !CONST_INT_P (otherops[2])
20165                       || (INTVAL (otherops[2]) > -256
20166                           && INTVAL (otherops[2]) < 256)))
20167                     {
20168                       if (emit)
20169                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20170                     }
20171                   else
20172                     {
20173                       if (emit)
20174                         {
20175                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20176                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20177                         }
20178                       if (count)
20179                         *count = 2;
20180
20181                     }
20182                 }
20183             }
20184           else
20185             {
20186               /* Use a single insn if we can.
20187                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
20188                  fix these up with a pair of ldr.  */
20189               if (can_ldrd
20190                   && (TARGET_THUMB2
20191                   || !CONST_INT_P (otherops[2])
20192                   || (INTVAL (otherops[2]) > -256
20193                       && INTVAL (otherops[2]) < 256)))
20194                 {
20195                   if (emit)
20196                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20197                 }
20198               else
20199                 {
20200                   if (emit)
20201                     {
20202                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20203                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20204                     }
20205                   if (count)
20206                     *count = 2;
20207                 }
20208             }
20209           break;
20210
20211         case LABEL_REF:
20212         case CONST:
20213           /* We might be able to use ldrd %0, %1 here.  However the range is
20214              different to ldr/adr, and it is broken on some ARMv7-M
20215              implementations.  */
20216           /* Use the second register of the pair to avoid problematic
20217              overlap.  */
20218           otherops[1] = operands[1];
20219           if (emit)
20220             output_asm_insn ("adr%?\t%0, %1", otherops);
20221           operands[1] = otherops[0];
20222           if (emit)
20223             {
20224               if (can_ldrd)
20225                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20226               else
20227                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20228             }
20229
20230           if (count)
20231             *count = 2;
20232           break;
20233
20234           /* ??? This needs checking for thumb2.  */
20235         default:
20236           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20237                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20238             {
20239               otherops[0] = operands[0];
20240               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20241               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20242
20243               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20244                 {
20245                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20246                     {
20247                       switch ((int) INTVAL (otherops[2]))
20248                         {
20249                         case -8:
20250                           if (emit)
20251                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20252                           return "";
20253                         case -4:
20254                           if (TARGET_THUMB2)
20255                             break;
20256                           if (emit)
20257                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20258                           return "";
20259                         case 4:
20260                           if (TARGET_THUMB2)
20261                             break;
20262                           if (emit)
20263                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20264                           return "";
20265                         }
20266                     }
20267                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20268                   operands[1] = otherops[0];
20269                   if (can_ldrd
20270                       && (REG_P (otherops[2])
20271                           || TARGET_THUMB2
20272                           || (CONST_INT_P (otherops[2])
20273                               && INTVAL (otherops[2]) > -256
20274                               && INTVAL (otherops[2]) < 256)))
20275                     {
20276                       if (reg_overlap_mentioned_p (operands[0],
20277                                                    otherops[2]))
20278                         {
20279                           /* Swap base and index registers over to
20280                              avoid a conflict.  */
20281                           std::swap (otherops[1], otherops[2]);
20282                         }
20283                       /* If both registers conflict, it will usually
20284                          have been fixed by a splitter.  */
20285                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
20286                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20287                         {
20288                           if (emit)
20289                             {
20290                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
20291                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20292                             }
20293                           if (count)
20294                             *count = 2;
20295                         }
20296                       else
20297                         {
20298                           otherops[0] = operands[0];
20299                           if (emit)
20300                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20301                         }
20302                       return "";
20303                     }
20304
20305                   if (CONST_INT_P (otherops[2]))
20306                     {
20307                       if (emit)
20308                         {
20309                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20310                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20311                           else
20312                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
20313                         }
20314                     }
20315                   else
20316                     {
20317                       if (emit)
20318                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
20319                     }
20320                 }
20321               else
20322                 {
20323                   if (emit)
20324                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20325                 }
20326
20327               if (count)
20328                 *count = 2;
20329
20330               if (can_ldrd)
20331                 return "ldrd%?\t%0, [%1]";
20332
20333               return "ldmia%?\t%1, %M0";
20334             }
20335           else
20336             {
20337               otherops[1] = adjust_address (operands[1], SImode, 4);
20338               /* Take care of overlapping base/data reg.  */
20339               if (reg_mentioned_p (operands[0], operands[1]))
20340                 {
20341                   if (emit)
20342                     {
20343                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20344                       output_asm_insn ("ldr%?\t%0, %1", operands);
20345                     }
20346                   if (count)
20347                     *count = 2;
20348
20349                 }
20350               else
20351                 {
20352                   if (emit)
20353                     {
20354                       output_asm_insn ("ldr%?\t%0, %1", operands);
20355                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20356                     }
20357                   if (count)
20358                     *count = 2;
20359                 }
20360             }
20361         }
20362     }
20363   else
20364     {
20365       /* Constraints should ensure this.  */
20366       gcc_assert (code0 == MEM && code1 == REG);
20367       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20368                   || (TARGET_ARM && TARGET_LDRD));
20369
20370       /* For TARGET_ARM the first source register of an STRD
20371          must be even.  This is usually the case for double-word
20372          values but user assembly constraints can force an odd
20373          starting register.  */
20374       bool allow_strd = TARGET_LDRD
20375                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20376       switch (GET_CODE (XEXP (operands[0], 0)))
20377         {
20378         case REG:
20379           if (emit)
20380             {
20381               if (allow_strd)
20382                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20383               else
20384                 output_asm_insn ("stm%?\t%m0, %M1", operands);
20385             }
20386           break;
20387
20388         case PRE_INC:
20389           gcc_assert (allow_strd);
20390           if (emit)
20391             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20392           break;
20393
20394         case PRE_DEC:
20395           if (emit)
20396             {
20397               if (allow_strd)
20398                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20399               else
20400                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20401             }
20402           break;
20403
20404         case POST_INC:
20405           if (emit)
20406             {
20407               if (allow_strd)
20408                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20409               else
20410                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20411             }
20412           break;
20413
20414         case POST_DEC:
20415           gcc_assert (allow_strd);
20416           if (emit)
20417             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20418           break;
20419
20420         case PRE_MODIFY:
20421         case POST_MODIFY:
20422           otherops[0] = operands[1];
20423           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20424           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20425
20426           /* IWMMXT allows offsets larger than strd can handle,
20427              fix these up with a pair of str.  */
20428           if (!TARGET_THUMB2
20429               && CONST_INT_P (otherops[2])
20430               && (INTVAL(otherops[2]) <= -256
20431                   || INTVAL(otherops[2]) >= 256))
20432             {
20433               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20434                 {
20435                   if (emit)
20436                     {
20437                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20438                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20439                     }
20440                   if (count)
20441                     *count = 2;
20442                 }
20443               else
20444                 {
20445                   if (emit)
20446                     {
20447                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20448                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20449                     }
20450                   if (count)
20451                     *count = 2;
20452                 }
20453             }
20454           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20455             {
20456               if (emit)
20457                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20458             }
20459           else
20460             {
20461               if (emit)
20462                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20463             }
20464           break;
20465
20466         case PLUS:
20467           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20468           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20469             {
20470               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20471                 {
20472                 case -8:
20473                   if (emit)
20474                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20475                   return "";
20476
20477                 case -4:
20478                   if (TARGET_THUMB2)
20479                     break;
20480                   if (emit)
20481                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20482                   return "";
20483
20484                 case 4:
20485                   if (TARGET_THUMB2)
20486                     break;
20487                   if (emit)
20488                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20489                   return "";
20490                 }
20491             }
20492           if (allow_strd
20493               && (REG_P (otherops[2])
20494                   || TARGET_THUMB2
20495                   || (CONST_INT_P (otherops[2])
20496                       && INTVAL (otherops[2]) > -256
20497                       && INTVAL (otherops[2]) < 256)))
20498             {
20499               otherops[0] = operands[1];
20500               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20501               if (emit)
20502                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20503               return "";
20504             }
20505           /* Fall through */
20506
20507         default:
20508           otherops[0] = adjust_address (operands[0], SImode, 4);
20509           otherops[1] = operands[1];
20510           if (emit)
20511             {
20512               output_asm_insn ("str%?\t%1, %0", operands);
20513               output_asm_insn ("str%?\t%H1, %0", otherops);
20514             }
20515           if (count)
20516             *count = 2;
20517         }
20518     }
20519
20520   return "";
20521 }
20522
20523 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20524    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20525
20526 const char *
20527 output_move_quad (rtx *operands)
20528 {
20529   if (REG_P (operands[0]))
20530     {
20531       /* Load, or reg->reg move.  */
20532
20533       if (MEM_P (operands[1]))
20534         {
20535           switch (GET_CODE (XEXP (operands[1], 0)))
20536             {
20537             case REG:
20538               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20539               break;
20540
20541             case LABEL_REF:
20542             case CONST:
20543               output_asm_insn ("adr%?\t%0, %1", operands);
20544               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20545               break;
20546
20547             default:
20548               gcc_unreachable ();
20549             }
20550         }
20551       else
20552         {
20553           rtx ops[2];
20554           int dest, src, i;
20555
20556           gcc_assert (REG_P (operands[1]));
20557
20558           dest = REGNO (operands[0]);
20559           src = REGNO (operands[1]);
20560
20561           /* This seems pretty dumb, but hopefully GCC won't try to do it
20562              very often.  */
20563           if (dest < src)
20564             for (i = 0; i < 4; i++)
20565               {
20566                 ops[0] = gen_rtx_REG (SImode, dest + i);
20567                 ops[1] = gen_rtx_REG (SImode, src + i);
20568                 output_asm_insn ("mov%?\t%0, %1", ops);
20569               }
20570           else
20571             for (i = 3; i >= 0; i--)
20572               {
20573                 ops[0] = gen_rtx_REG (SImode, dest + i);
20574                 ops[1] = gen_rtx_REG (SImode, src + i);
20575                 output_asm_insn ("mov%?\t%0, %1", ops);
20576               }
20577         }
20578     }
20579   else
20580     {
20581       gcc_assert (MEM_P (operands[0]));
20582       gcc_assert (REG_P (operands[1]));
20583       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20584
20585       switch (GET_CODE (XEXP (operands[0], 0)))
20586         {
20587         case REG:
20588           output_asm_insn ("stm%?\t%m0, %M1", operands);
20589           break;
20590
20591         default:
20592           gcc_unreachable ();
20593         }
20594     }
20595
20596   return "";
20597 }
20598
20599 /* Output a VFP load or store instruction.  */
20600
20601 const char *
20602 output_move_vfp (rtx *operands)
20603 {
20604   rtx reg, mem, addr, ops[2];
20605   int load = REG_P (operands[0]);
20606   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20607   int sp = (!TARGET_VFP_FP16INST
20608             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20609   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20610   const char *templ;
20611   char buff[50];
20612   machine_mode mode;
20613
20614   reg = operands[!load];
20615   mem = operands[load];
20616
20617   mode = GET_MODE (reg);
20618
20619   gcc_assert (REG_P (reg));
20620   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20621   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20622               || mode == SFmode
20623               || mode == DFmode
20624               || mode == HImode
20625               || mode == SImode
20626               || mode == DImode
20627               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20628   gcc_assert (MEM_P (mem));
20629
20630   addr = XEXP (mem, 0);
20631
20632   switch (GET_CODE (addr))
20633     {
20634     case PRE_DEC:
20635       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20636       ops[0] = XEXP (addr, 0);
20637       ops[1] = reg;
20638       break;
20639
20640     case POST_INC:
20641       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20642       ops[0] = XEXP (addr, 0);
20643       ops[1] = reg;
20644       break;
20645
20646     default:
20647       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20648       ops[0] = reg;
20649       ops[1] = mem;
20650       break;
20651     }
20652
20653   sprintf (buff, templ,
20654            load ? "ld" : "st",
20655            dp ? "64" : sp ? "32" : "16",
20656            dp ? "P" : "",
20657            integer_p ? "\t%@ int" : "");
20658   output_asm_insn (buff, ops);
20659
20660   return "";
20661 }
20662
20663 /* Output a Neon double-word or quad-word load or store, or a load
20664    or store for larger structure modes.
20665
20666    WARNING: The ordering of elements is weird in big-endian mode,
20667    because the EABI requires that vectors stored in memory appear
20668    as though they were stored by a VSTM, as required by the EABI.
20669    GCC RTL defines element ordering based on in-memory order.
20670    This can be different from the architectural ordering of elements
20671    within a NEON register. The intrinsics defined in arm_neon.h use the
20672    NEON register element ordering, not the GCC RTL element ordering.
20673
20674    For example, the in-memory ordering of a big-endian a quadword
20675    vector with 16-bit elements when stored from register pair {d0,d1}
20676    will be (lowest address first, d0[N] is NEON register element N):
20677
20678      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20679
20680    When necessary, quadword registers (dN, dN+1) are moved to ARM
20681    registers from rN in the order:
20682
20683      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20684
20685    So that STM/LDM can be used on vectors in ARM registers, and the
20686    same memory layout will result as if VSTM/VLDM were used.
20687
20688    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20689    possible, which allows use of appropriate alignment tags.
20690    Note that the choice of "64" is independent of the actual vector
20691    element size; this size simply ensures that the behavior is
20692    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20693
20694    Due to limitations of those instructions, use of VST1.64/VLD1.64
20695    is not possible if:
20696     - the address contains PRE_DEC, or
20697     - the mode refers to more than 4 double-word registers
20698
20699    In those cases, it would be possible to replace VSTM/VLDM by a
20700    sequence of instructions; this is not currently implemented since
20701    this is not certain to actually improve performance.  */
20702
20703 const char *
20704 output_move_neon (rtx *operands)
20705 {
20706   rtx reg, mem, addr, ops[2];
20707   int regno, nregs, load = REG_P (operands[0]);
20708   const char *templ;
20709   char buff[50];
20710   machine_mode mode;
20711
20712   reg = operands[!load];
20713   mem = operands[load];
20714
20715   mode = GET_MODE (reg);
20716
20717   gcc_assert (REG_P (reg));
20718   regno = REGNO (reg);
20719   nregs = REG_NREGS (reg) / 2;
20720   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20721               || NEON_REGNO_OK_FOR_QUAD (regno));
20722   gcc_assert (VALID_NEON_DREG_MODE (mode)
20723               || VALID_NEON_QREG_MODE (mode)
20724               || VALID_NEON_STRUCT_MODE (mode));
20725   gcc_assert (MEM_P (mem));
20726
20727   addr = XEXP (mem, 0);
20728
20729   /* Strip off const from addresses like (const (plus (...))).  */
20730   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20731     addr = XEXP (addr, 0);
20732
20733   switch (GET_CODE (addr))
20734     {
20735     case POST_INC:
20736       /* We have to use vldm / vstm for too-large modes.  */
20737       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20738         {
20739           templ = "v%smia%%?\t%%0!, %%h1";
20740           ops[0] = XEXP (addr, 0);
20741         }
20742       else
20743         {
20744           templ = "v%s1.64\t%%h1, %%A0";
20745           ops[0] = mem;
20746         }
20747       ops[1] = reg;
20748       break;
20749
20750     case PRE_DEC:
20751       /* We have to use vldm / vstm in this case, since there is no
20752          pre-decrement form of the vld1 / vst1 instructions.  */
20753       templ = "v%smdb%%?\t%%0!, %%h1";
20754       ops[0] = XEXP (addr, 0);
20755       ops[1] = reg;
20756       break;
20757
20758     case POST_MODIFY:
20759       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20760       gcc_unreachable ();
20761
20762     case REG:
20763       /* We have to use vldm / vstm for too-large modes.  */
20764       if (nregs > 1)
20765         {
20766           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20767             templ = "v%smia%%?\t%%m0, %%h1";
20768           else
20769             templ = "v%s1.64\t%%h1, %%A0";
20770
20771           ops[0] = mem;
20772           ops[1] = reg;
20773           break;
20774         }
20775       /* Fall through.  */
20776     case PLUS:
20777       if (GET_CODE (addr) == PLUS)
20778         addr = XEXP (addr, 0);
20779       /* Fall through.  */
20780     case LABEL_REF:
20781       {
20782         int i;
20783         int overlap = -1;
20784         for (i = 0; i < nregs; i++)
20785           {
20786             /* We're only using DImode here because it's a convenient
20787                size.  */
20788             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20789             ops[1] = adjust_address (mem, DImode, 8 * i);
20790             if (reg_overlap_mentioned_p (ops[0], mem))
20791               {
20792                 gcc_assert (overlap == -1);
20793                 overlap = i;
20794               }
20795             else
20796               {
20797                 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20798                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20799                 else
20800                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20801                 output_asm_insn (buff, ops);
20802               }
20803           }
20804         if (overlap != -1)
20805           {
20806             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20807             ops[1] = adjust_address (mem, SImode, 8 * overlap);
20808             if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20809               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20810             else
20811               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20812             output_asm_insn (buff, ops);
20813           }
20814
20815         return "";
20816       }
20817
20818     default:
20819       gcc_unreachable ();
20820     }
20821
20822   sprintf (buff, templ, load ? "ld" : "st");
20823   output_asm_insn (buff, ops);
20824
20825   return "";
20826 }
20827
20828 /* Compute and return the length of neon_mov<mode>, where <mode> is
20829    one of VSTRUCT modes: EI, OI, CI or XI.  */
20830 int
20831 arm_attr_length_move_neon (rtx_insn *insn)
20832 {
20833   rtx reg, mem, addr;
20834   int load;
20835   machine_mode mode;
20836
20837   extract_insn_cached (insn);
20838
20839   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20840     {
20841       mode = GET_MODE (recog_data.operand[0]);
20842       switch (mode)
20843         {
20844         case E_EImode:
20845         case E_OImode:
20846           return 8;
20847         case E_CImode:
20848           return 12;
20849         case E_XImode:
20850           return 16;
20851         default:
20852           gcc_unreachable ();
20853         }
20854     }
20855
20856   load = REG_P (recog_data.operand[0]);
20857   reg = recog_data.operand[!load];
20858   mem = recog_data.operand[load];
20859
20860   gcc_assert (MEM_P (mem));
20861
20862   addr = XEXP (mem, 0);
20863
20864   /* Strip off const from addresses like (const (plus (...))).  */
20865   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20866     addr = XEXP (addr, 0);
20867
20868   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20869     {
20870       int insns = REG_NREGS (reg) / 2;
20871       return insns * 4;
20872     }
20873   else
20874     return 4;
20875 }
20876
20877 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20878    return zero.  */
20879
20880 int
20881 arm_address_offset_is_imm (rtx_insn *insn)
20882 {
20883   rtx mem, addr;
20884
20885   extract_insn_cached (insn);
20886
20887   if (REG_P (recog_data.operand[0]))
20888     return 0;
20889
20890   mem = recog_data.operand[0];
20891
20892   gcc_assert (MEM_P (mem));
20893
20894   addr = XEXP (mem, 0);
20895
20896   if (REG_P (addr)
20897       || (GET_CODE (addr) == PLUS
20898           && REG_P (XEXP (addr, 0))
20899           && CONST_INT_P (XEXP (addr, 1))))
20900     return 1;
20901   else
20902     return 0;
20903 }
20904
20905 /* Output an ADD r, s, #n where n may be too big for one instruction.
20906    If adding zero to one register, output nothing.  */
20907 const char *
20908 output_add_immediate (rtx *operands)
20909 {
20910   HOST_WIDE_INT n = INTVAL (operands[2]);
20911
20912   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20913     {
20914       if (n < 0)
20915         output_multi_immediate (operands,
20916                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20917                                 -n);
20918       else
20919         output_multi_immediate (operands,
20920                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20921                                 n);
20922     }
20923
20924   return "";
20925 }
20926
20927 /* Output a multiple immediate operation.
20928    OPERANDS is the vector of operands referred to in the output patterns.
20929    INSTR1 is the output pattern to use for the first constant.
20930    INSTR2 is the output pattern to use for subsequent constants.
20931    IMMED_OP is the index of the constant slot in OPERANDS.
20932    N is the constant value.  */
20933 static const char *
20934 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20935                         int immed_op, HOST_WIDE_INT n)
20936 {
20937 #if HOST_BITS_PER_WIDE_INT > 32
20938   n &= 0xffffffff;
20939 #endif
20940
20941   if (n == 0)
20942     {
20943       /* Quick and easy output.  */
20944       operands[immed_op] = const0_rtx;
20945       output_asm_insn (instr1, operands);
20946     }
20947   else
20948     {
20949       int i;
20950       const char * instr = instr1;
20951
20952       /* Note that n is never zero here (which would give no output).  */
20953       for (i = 0; i < 32; i += 2)
20954         {
20955           if (n & (3 << i))
20956             {
20957               operands[immed_op] = GEN_INT (n & (255 << i));
20958               output_asm_insn (instr, operands);
20959               instr = instr2;
20960               i += 6;
20961             }
20962         }
20963     }
20964
20965   return "";
20966 }
20967
20968 /* Return the name of a shifter operation.  */
20969 static const char *
20970 arm_shift_nmem(enum rtx_code code)
20971 {
20972   switch (code)
20973     {
20974     case ASHIFT:
20975       return ARM_LSL_NAME;
20976
20977     case ASHIFTRT:
20978       return "asr";
20979
20980     case LSHIFTRT:
20981       return "lsr";
20982
20983     case ROTATERT:
20984       return "ror";
20985
20986     default:
20987       abort();
20988     }
20989 }
20990
20991 /* Return the appropriate ARM instruction for the operation code.
20992    The returned result should not be overwritten.  OP is the rtx of the
20993    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20994    was shifted.  */
20995 const char *
20996 arithmetic_instr (rtx op, int shift_first_arg)
20997 {
20998   switch (GET_CODE (op))
20999     {
21000     case PLUS:
21001       return "add";
21002
21003     case MINUS:
21004       return shift_first_arg ? "rsb" : "sub";
21005
21006     case IOR:
21007       return "orr";
21008
21009     case XOR:
21010       return "eor";
21011
21012     case AND:
21013       return "and";
21014
21015     case ASHIFT:
21016     case ASHIFTRT:
21017     case LSHIFTRT:
21018     case ROTATERT:
21019       return arm_shift_nmem(GET_CODE(op));
21020
21021     default:
21022       gcc_unreachable ();
21023     }
21024 }
21025
21026 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21027    for the operation code.  The returned result should not be overwritten.
21028    OP is the rtx code of the shift.
21029    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21030    shift.  */
21031 static const char *
21032 shift_op (rtx op, HOST_WIDE_INT *amountp)
21033 {
21034   const char * mnem;
21035   enum rtx_code code = GET_CODE (op);
21036
21037   switch (code)
21038     {
21039     case ROTATE:
21040       if (!CONST_INT_P (XEXP (op, 1)))
21041         {
21042           output_operand_lossage ("invalid shift operand");
21043           return NULL;
21044         }
21045
21046       code = ROTATERT;
21047       *amountp = 32 - INTVAL (XEXP (op, 1));
21048       mnem = "ror";
21049       break;
21050
21051     case ASHIFT:
21052     case ASHIFTRT:
21053     case LSHIFTRT:
21054     case ROTATERT:
21055       mnem = arm_shift_nmem(code);
21056       if (CONST_INT_P (XEXP (op, 1)))
21057         {
21058           *amountp = INTVAL (XEXP (op, 1));
21059         }
21060       else if (REG_P (XEXP (op, 1)))
21061         {
21062           *amountp = -1;
21063           return mnem;
21064         }
21065       else
21066         {
21067           output_operand_lossage ("invalid shift operand");
21068           return NULL;
21069         }
21070       break;
21071
21072     case MULT:
21073       /* We never have to worry about the amount being other than a
21074          power of 2, since this case can never be reloaded from a reg.  */
21075       if (!CONST_INT_P (XEXP (op, 1)))
21076         {
21077           output_operand_lossage ("invalid shift operand");
21078           return NULL;
21079         }
21080
21081       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21082
21083       /* Amount must be a power of two.  */
21084       if (*amountp & (*amountp - 1))
21085         {
21086           output_operand_lossage ("invalid shift operand");
21087           return NULL;
21088         }
21089
21090       *amountp = exact_log2 (*amountp);
21091       gcc_assert (IN_RANGE (*amountp, 0, 31));
21092       return ARM_LSL_NAME;
21093
21094     default:
21095       output_operand_lossage ("invalid shift operand");
21096       return NULL;
21097     }
21098
21099   /* This is not 100% correct, but follows from the desire to merge
21100      multiplication by a power of 2 with the recognizer for a
21101      shift.  >=32 is not a valid shift for "lsl", so we must try and
21102      output a shift that produces the correct arithmetical result.
21103      Using lsr #32 is identical except for the fact that the carry bit
21104      is not set correctly if we set the flags; but we never use the
21105      carry bit from such an operation, so we can ignore that.  */
21106   if (code == ROTATERT)
21107     /* Rotate is just modulo 32.  */
21108     *amountp &= 31;
21109   else if (*amountp != (*amountp & 31))
21110     {
21111       if (code == ASHIFT)
21112         mnem = "lsr";
21113       *amountp = 32;
21114     }
21115
21116   /* Shifts of 0 are no-ops.  */
21117   if (*amountp == 0)
21118     return NULL;
21119
21120   return mnem;
21121 }
21122
21123 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
21124    because /bin/as is horribly restrictive.  The judgement about
21125    whether or not each character is 'printable' (and can be output as
21126    is) or not (and must be printed with an octal escape) must be made
21127    with reference to the *host* character set -- the situation is
21128    similar to that discussed in the comments above pp_c_char in
21129    c-pretty-print.cc.  */
21130
21131 #define MAX_ASCII_LEN 51
21132
21133 void
21134 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21135 {
21136   int i;
21137   int len_so_far = 0;
21138
21139   fputs ("\t.ascii\t\"", stream);
21140
21141   for (i = 0; i < len; i++)
21142     {
21143       int c = p[i];
21144
21145       if (len_so_far >= MAX_ASCII_LEN)
21146         {
21147           fputs ("\"\n\t.ascii\t\"", stream);
21148           len_so_far = 0;
21149         }
21150
21151       if (ISPRINT (c))
21152         {
21153           if (c == '\\' || c == '\"')
21154             {
21155               putc ('\\', stream);
21156               len_so_far++;
21157             }
21158           putc (c, stream);
21159           len_so_far++;
21160         }
21161       else
21162         {
21163           fprintf (stream, "\\%03o", c);
21164           len_so_far += 4;
21165         }
21166     }
21167
21168   fputs ("\"\n", stream);
21169 }
21170 \f
21171
21172 /* Compute the register save mask for registers 0 through 12
21173    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
21174
21175 static unsigned long
21176 arm_compute_save_reg0_reg12_mask (void)
21177 {
21178   unsigned long func_type = arm_current_func_type ();
21179   unsigned long save_reg_mask = 0;
21180   unsigned int reg;
21181
21182   if (IS_INTERRUPT (func_type))
21183     {
21184       unsigned int max_reg;
21185       /* Interrupt functions must not corrupt any registers,
21186          even call clobbered ones.  If this is a leaf function
21187          we can just examine the registers used by the RTL, but
21188          otherwise we have to assume that whatever function is
21189          called might clobber anything, and so we have to save
21190          all the call-clobbered registers as well.  */
21191       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21192         /* FIQ handlers have registers r8 - r12 banked, so
21193            we only need to check r0 - r7, Normal ISRs only
21194            bank r14 and r15, so we must check up to r12.
21195            r13 is the stack pointer which is always preserved,
21196            so we do not need to consider it here.  */
21197         max_reg = 7;
21198       else
21199         max_reg = 12;
21200
21201       for (reg = 0; reg <= max_reg; reg++)
21202         if (reg_needs_saving_p (reg))
21203           save_reg_mask |= (1 << reg);
21204
21205       /* Also save the pic base register if necessary.  */
21206       if (PIC_REGISTER_MAY_NEED_SAVING
21207           && crtl->uses_pic_offset_table)
21208         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21209     }
21210   else if (IS_VOLATILE(func_type))
21211     {
21212       /* For noreturn functions we historically omitted register saves
21213          altogether.  However this really messes up debugging.  As a
21214          compromise save just the frame pointers.  Combined with the link
21215          register saved elsewhere this should be sufficient to get
21216          a backtrace.  */
21217       if (frame_pointer_needed)
21218         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21219       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21220         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21221       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21222         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21223     }
21224   else
21225     {
21226       /* In the normal case we only need to save those registers
21227          which are call saved and which are used by this function.  */
21228       for (reg = 0; reg <= 11; reg++)
21229         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21230           save_reg_mask |= (1 << reg);
21231
21232       /* Handle the frame pointer as a special case.  */
21233       if (frame_pointer_needed)
21234         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21235
21236       /* If we aren't loading the PIC register,
21237          don't stack it even though it may be live.  */
21238       if (PIC_REGISTER_MAY_NEED_SAVING
21239           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21240               || crtl->uses_pic_offset_table))
21241         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21242
21243       /* The prologue will copy SP into R0, so save it.  */
21244       if (IS_STACKALIGN (func_type))
21245         save_reg_mask |= 1;
21246     }
21247
21248   /* Save registers so the exception handler can modify them.  */
21249   if (crtl->calls_eh_return)
21250     {
21251       unsigned int i;
21252
21253       for (i = 0; ; i++)
21254         {
21255           reg = EH_RETURN_DATA_REGNO (i);
21256           if (reg == INVALID_REGNUM)
21257             break;
21258           save_reg_mask |= 1 << reg;
21259         }
21260     }
21261
21262   return save_reg_mask;
21263 }
21264
21265 /* Return true if r3 is live at the start of the function.  */
21266
21267 static bool
21268 arm_r3_live_at_start_p (void)
21269 {
21270   /* Just look at cfg info, which is still close enough to correct at this
21271      point.  This gives false positives for broken functions that might use
21272      uninitialized data that happens to be allocated in r3, but who cares?  */
21273   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21274 }
21275
21276 /* Compute the number of bytes used to store the static chain register on the
21277    stack, above the stack frame.  We need to know this accurately to get the
21278    alignment of the rest of the stack frame correct.  */
21279
21280 static int
21281 arm_compute_static_chain_stack_bytes (void)
21282 {
21283   /* Once the value is updated from the init value of -1, do not
21284      re-compute.  */
21285   if (cfun->machine->static_chain_stack_bytes != -1)
21286     return cfun->machine->static_chain_stack_bytes;
21287
21288   /* See the defining assertion in arm_expand_prologue.  */
21289   if (IS_NESTED (arm_current_func_type ())
21290       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21291           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21292                || flag_stack_clash_protection)
21293               && !df_regs_ever_live_p (LR_REGNUM)))
21294       && arm_r3_live_at_start_p ()
21295       && crtl->args.pretend_args_size == 0)
21296     return 4;
21297
21298   return 0;
21299 }
21300
21301 /* Compute a bit mask of which core registers need to be
21302    saved on the stack for the current function.
21303    This is used by arm_compute_frame_layout, which may add extra registers.  */
21304
21305 static unsigned long
21306 arm_compute_save_core_reg_mask (void)
21307 {
21308   unsigned int save_reg_mask = 0;
21309   unsigned long func_type = arm_current_func_type ();
21310   unsigned int reg;
21311
21312   if (IS_NAKED (func_type))
21313     /* This should never really happen.  */
21314     return 0;
21315
21316   /* If we are creating a stack frame, then we must save the frame pointer,
21317      IP (which will hold the old stack pointer), LR and the PC.  */
21318   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21319     save_reg_mask |=
21320       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21321       | (1 << IP_REGNUM)
21322       | (1 << LR_REGNUM)
21323       | (1 << PC_REGNUM);
21324
21325   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21326
21327   if (arm_current_function_pac_enabled_p ())
21328     save_reg_mask |= 1 << IP_REGNUM;
21329
21330   /* Decide if we need to save the link register.
21331      Interrupt routines have their own banked link register,
21332      so they never need to save it.
21333      Otherwise if we do not use the link register we do not need to save
21334      it.  If we are pushing other registers onto the stack however, we
21335      can save an instruction in the epilogue by pushing the link register
21336      now and then popping it back into the PC.  This incurs extra memory
21337      accesses though, so we only do it when optimizing for size, and only
21338      if we know that we will not need a fancy return sequence.  */
21339   if (df_regs_ever_live_p (LR_REGNUM)
21340       || (save_reg_mask
21341           && optimize_size
21342           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21343           && !crtl->tail_call_emit
21344           && !crtl->calls_eh_return))
21345     save_reg_mask |= 1 << LR_REGNUM;
21346
21347   if (cfun->machine->lr_save_eliminated)
21348     save_reg_mask &= ~ (1 << LR_REGNUM);
21349
21350   if (TARGET_REALLY_IWMMXT
21351       && ((bit_count (save_reg_mask)
21352            + ARM_NUM_INTS (crtl->args.pretend_args_size +
21353                            arm_compute_static_chain_stack_bytes())
21354            ) % 2) != 0)
21355     {
21356       /* The total number of registers that are going to be pushed
21357          onto the stack is odd.  We need to ensure that the stack
21358          is 64-bit aligned before we start to save iWMMXt registers,
21359          and also before we start to create locals.  (A local variable
21360          might be a double or long long which we will load/store using
21361          an iWMMXt instruction).  Therefore we need to push another
21362          ARM register, so that the stack will be 64-bit aligned.  We
21363          try to avoid using the arg registers (r0 -r3) as they might be
21364          used to pass values in a tail call.  */
21365       for (reg = 4; reg <= 12; reg++)
21366         if ((save_reg_mask & (1 << reg)) == 0)
21367           break;
21368
21369       if (reg <= 12)
21370         save_reg_mask |= (1 << reg);
21371       else
21372         {
21373           cfun->machine->sibcall_blocked = 1;
21374           save_reg_mask |= (1 << 3);
21375         }
21376     }
21377
21378   /* We may need to push an additional register for use initializing the
21379      PIC base register.  */
21380   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21381       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21382     {
21383       reg = thumb_find_work_register (1 << 4);
21384       if (!call_used_or_fixed_reg_p (reg))
21385         save_reg_mask |= (1 << reg);
21386     }
21387
21388   return save_reg_mask;
21389 }
21390
21391 /* Compute a bit mask of which core registers need to be
21392    saved on the stack for the current function.  */
21393 static unsigned long
21394 thumb1_compute_save_core_reg_mask (void)
21395 {
21396   unsigned long mask;
21397   unsigned reg;
21398
21399   mask = 0;
21400   for (reg = 0; reg < 12; reg ++)
21401     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21402       mask |= 1 << reg;
21403
21404   /* Handle the frame pointer as a special case.  */
21405   if (frame_pointer_needed)
21406     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21407
21408   if (flag_pic
21409       && !TARGET_SINGLE_PIC_BASE
21410       && arm_pic_register != INVALID_REGNUM
21411       && crtl->uses_pic_offset_table)
21412     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21413
21414   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21415   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21416     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21417
21418   /* LR will also be pushed if any lo regs are pushed.  */
21419   if (mask & 0xff || thumb_force_lr_save ())
21420     mask |= (1 << LR_REGNUM);
21421
21422   bool call_clobbered_scratch
21423     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21424        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21425
21426   /* Make sure we have a low work register if we need one.  We will
21427      need one if we are going to push a high register, but we are not
21428      currently intending to push a low register.  However if both the
21429      prologue and epilogue have a spare call-clobbered low register,
21430      then we won't need to find an additional work register.  It does
21431      not need to be the same register in the prologue and
21432      epilogue.  */
21433   if ((mask & 0xff) == 0
21434       && !call_clobbered_scratch
21435       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21436     {
21437       /* Use thumb_find_work_register to choose which register
21438          we will use.  If the register is live then we will
21439          have to push it.  Use LAST_LO_REGNUM as our fallback
21440          choice for the register to select.  */
21441       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21442       /* Make sure the register returned by thumb_find_work_register is
21443          not part of the return value.  */
21444       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21445         reg = LAST_LO_REGNUM;
21446
21447       if (callee_saved_reg_p (reg))
21448         mask |= 1 << reg;
21449     }
21450
21451   /* The 504 below is 8 bytes less than 512 because there are two possible
21452      alignment words.  We can't tell here if they will be present or not so we
21453      have to play it safe and assume that they are. */
21454   if ((CALLER_INTERWORKING_SLOT_SIZE +
21455        ROUND_UP_WORD (get_frame_size ()) +
21456        crtl->outgoing_args_size) >= 504)
21457     {
21458       /* This is the same as the code in thumb1_expand_prologue() which
21459          determines which register to use for stack decrement. */
21460       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21461         if (mask & (1 << reg))
21462           break;
21463
21464       if (reg > LAST_LO_REGNUM)
21465         {
21466           /* Make sure we have a register available for stack decrement. */
21467           mask |= 1 << LAST_LO_REGNUM;
21468         }
21469     }
21470
21471   return mask;
21472 }
21473
21474 /* Return the number of bytes required to save VFP registers.  */
21475 static int
21476 arm_get_vfp_saved_size (void)
21477 {
21478   unsigned int regno;
21479   int count;
21480   int saved;
21481
21482   saved = 0;
21483   /* Space for saved VFP registers.  */
21484   if (TARGET_VFP_BASE)
21485     {
21486       count = 0;
21487       for (regno = FIRST_VFP_REGNUM;
21488            regno < LAST_VFP_REGNUM;
21489            regno += 2)
21490         {
21491           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21492             {
21493               if (count > 0)
21494                 {
21495                   /* Workaround ARM10 VFPr1 bug.  */
21496                   if (count == 2 && !arm_arch6)
21497                     count++;
21498                   saved += count * 8;
21499                 }
21500               count = 0;
21501             }
21502           else
21503             count++;
21504         }
21505       if (count > 0)
21506         {
21507           if (count == 2 && !arm_arch6)
21508             count++;
21509           saved += count * 8;
21510         }
21511     }
21512   return saved;
21513 }
21514
21515
21516 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21517    everything bar the final return instruction.  If simple_return is true,
21518    then do not output epilogue, because it has already been emitted in RTL.
21519
21520    Note: do not forget to update length attribute of corresponding insn pattern
21521    when changing assembly output (eg. length attribute of
21522    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21523    register clearing sequences).  */
21524 const char *
21525 output_return_instruction (rtx operand, bool really_return, bool reverse,
21526                            bool simple_return)
21527 {
21528   char conditional[10];
21529   char instr[100];
21530   unsigned reg;
21531   unsigned long live_regs_mask;
21532   unsigned long func_type;
21533   arm_stack_offsets *offsets;
21534
21535   func_type = arm_current_func_type ();
21536
21537   if (IS_NAKED (func_type))
21538     return "";
21539
21540   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21541     {
21542       /* If this function was declared non-returning, and we have
21543          found a tail call, then we have to trust that the called
21544          function won't return.  */
21545       if (really_return)
21546         {
21547           rtx ops[2];
21548
21549           /* Otherwise, trap an attempted return by aborting.  */
21550           ops[0] = operand;
21551           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21552                                        : "abort");
21553           assemble_external_libcall (ops[1]);
21554           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21555         }
21556
21557       return "";
21558     }
21559
21560   gcc_assert (!cfun->calls_alloca || really_return);
21561
21562   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21563
21564   cfun->machine->return_used_this_function = 1;
21565
21566   offsets = arm_get_frame_offsets ();
21567   live_regs_mask = offsets->saved_regs_mask;
21568
21569   if (!simple_return && live_regs_mask)
21570     {
21571       const char * return_reg;
21572
21573       /* If we do not have any special requirements for function exit
21574          (e.g. interworking) then we can load the return address
21575          directly into the PC.  Otherwise we must load it into LR.  */
21576       if (really_return
21577           && !IS_CMSE_ENTRY (func_type)
21578           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21579         return_reg = reg_names[PC_REGNUM];
21580       else
21581         return_reg = reg_names[LR_REGNUM];
21582
21583       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21584         {
21585           /* There are three possible reasons for the IP register
21586              being saved.  1) a stack frame was created, in which case
21587              IP contains the old stack pointer, or 2) an ISR routine
21588              corrupted it, or 3) it was saved to align the stack on
21589              iWMMXt.  In case 1, restore IP into SP, otherwise just
21590              restore IP.  */
21591           if (frame_pointer_needed)
21592             {
21593               live_regs_mask &= ~ (1 << IP_REGNUM);
21594               live_regs_mask |=   (1 << SP_REGNUM);
21595             }
21596           else
21597             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21598         }
21599
21600       /* On some ARM architectures it is faster to use LDR rather than
21601          LDM to load a single register.  On other architectures, the
21602          cost is the same.  In 26 bit mode, or for exception handlers,
21603          we have to use LDM to load the PC so that the CPSR is also
21604          restored.  */
21605       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21606         if (live_regs_mask == (1U << reg))
21607           break;
21608
21609       if (reg <= LAST_ARM_REGNUM
21610           && (reg != LR_REGNUM
21611               || ! really_return
21612               || ! IS_INTERRUPT (func_type)))
21613         {
21614           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21615                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21616         }
21617       else
21618         {
21619           char *p;
21620           int first = 1;
21621
21622           /* Generate the load multiple instruction to restore the
21623              registers.  Note we can get here, even if
21624              frame_pointer_needed is true, but only if sp already
21625              points to the base of the saved core registers.  */
21626           if (live_regs_mask & (1 << SP_REGNUM))
21627             {
21628               unsigned HOST_WIDE_INT stack_adjust;
21629
21630               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21631               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21632
21633               if (stack_adjust && arm_arch5t && TARGET_ARM)
21634                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21635               else
21636                 {
21637                   /* If we can't use ldmib (SA110 bug),
21638                      then try to pop r3 instead.  */
21639                   if (stack_adjust)
21640                     live_regs_mask |= 1 << 3;
21641
21642                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21643                 }
21644             }
21645           /* For interrupt returns we have to use an LDM rather than
21646              a POP so that we can use the exception return variant.  */
21647           else if (IS_INTERRUPT (func_type))
21648             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21649           else
21650             sprintf (instr, "pop%s\t{", conditional);
21651
21652           p = instr + strlen (instr);
21653
21654           for (reg = 0; reg <= SP_REGNUM; reg++)
21655             if (live_regs_mask & (1 << reg))
21656               {
21657                 int l = strlen (reg_names[reg]);
21658
21659                 if (first)
21660                   first = 0;
21661                 else
21662                   {
21663                     memcpy (p, ", ", 2);
21664                     p += 2;
21665                   }
21666
21667                 memcpy (p, "%|", 2);
21668                 memcpy (p + 2, reg_names[reg], l);
21669                 p += l + 2;
21670               }
21671
21672           if (live_regs_mask & (1 << LR_REGNUM))
21673             {
21674               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21675               /* If returning from an interrupt, restore the CPSR.  */
21676               if (IS_INTERRUPT (func_type))
21677                 strcat (p, "^");
21678             }
21679           else
21680             strcpy (p, "}");
21681         }
21682
21683       output_asm_insn (instr, & operand);
21684
21685       /* See if we need to generate an extra instruction to
21686          perform the actual function return.  */
21687       if (really_return
21688           && func_type != ARM_FT_INTERWORKED
21689           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21690         {
21691           /* The return has already been handled
21692              by loading the LR into the PC.  */
21693           return "";
21694         }
21695     }
21696
21697   if (really_return)
21698     {
21699       switch ((int) ARM_FUNC_TYPE (func_type))
21700         {
21701         case ARM_FT_ISR:
21702         case ARM_FT_FIQ:
21703           /* ??? This is wrong for unified assembly syntax.  */
21704           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21705           break;
21706
21707         case ARM_FT_INTERWORKED:
21708           gcc_assert (arm_arch5t || arm_arch4t);
21709           sprintf (instr, "bx%s\t%%|lr", conditional);
21710           break;
21711
21712         case ARM_FT_EXCEPTION:
21713           /* ??? This is wrong for unified assembly syntax.  */
21714           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21715           break;
21716
21717         default:
21718           if (IS_CMSE_ENTRY (func_type))
21719             {
21720               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21721                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21722                  VSTR/VLDR instructions in the prologue and epilogue.  */
21723               if (!TARGET_HAVE_FPCXT_CMSE)
21724                 {
21725                   /* Check if we have to clear the 'GE bits' which is only used if
21726                      parallel add and subtraction instructions are available.  */
21727                   if (TARGET_INT_SIMD)
21728                     snprintf (instr, sizeof (instr),
21729                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21730                   else
21731                     snprintf (instr, sizeof (instr),
21732                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21733
21734                   output_asm_insn (instr, & operand);
21735                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21736                      care of it.  */
21737                   if (TARGET_HARD_FLOAT)
21738                     {
21739                       /* Clear the cumulative exception-status bits (0-4,7) and
21740                          the condition code bits (28-31) of the FPSCR.  We need
21741                          to remember to clear the first scratch register used
21742                          (IP) and save and restore the second (r4).
21743
21744                          Important note: the length of the
21745                          thumb2_cmse_entry_return insn pattern must account for
21746                          the size of the below instructions.  */
21747                       output_asm_insn ("push\t{%|r4}", & operand);
21748                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21749                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21750                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21751                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21752                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21753                       output_asm_insn ("pop\t{%|r4}", & operand);
21754                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21755                     }
21756                 }
21757               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21758             }
21759           /* Use bx if it's available.  */
21760           else if (arm_arch5t || arm_arch4t)
21761             sprintf (instr, "bx%s\t%%|lr", conditional);
21762           else
21763             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21764           break;
21765         }
21766
21767       output_asm_insn (instr, & operand);
21768     }
21769
21770   return "";
21771 }
21772
21773 /* Output in FILE asm statements needed to declare the NAME of the function
21774    defined by its DECL node.  */
21775
21776 void
21777 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21778 {
21779   size_t cmse_name_len;
21780   char *cmse_name = 0;
21781   char cmse_prefix[] = "__acle_se_";
21782
21783   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21784      extra function label for each function with the 'cmse_nonsecure_entry'
21785      attribute.  This extra function label should be prepended with
21786      '__acle_se_', telling the linker that it needs to create secure gateway
21787      veneers for this function.  */
21788   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21789                                     DECL_ATTRIBUTES (decl)))
21790     {
21791       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21792       cmse_name = XALLOCAVEC (char, cmse_name_len);
21793       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21794       targetm.asm_out.globalize_label (file, cmse_name);
21795
21796       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21797       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21798     }
21799
21800   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21801   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21802   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21803   ASM_OUTPUT_LABEL (file, name);
21804
21805   if (cmse_name)
21806     ASM_OUTPUT_LABEL (file, cmse_name);
21807
21808   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21809 }
21810
21811 /* Write the function name into the code section, directly preceding
21812    the function prologue.
21813
21814    Code will be output similar to this:
21815      t0
21816          .ascii "arm_poke_function_name", 0
21817          .align
21818      t1
21819          .word 0xff000000 + (t1 - t0)
21820      arm_poke_function_name
21821          mov     ip, sp
21822          stmfd   sp!, {fp, ip, lr, pc}
21823          sub     fp, ip, #4
21824
21825    When performing a stack backtrace, code can inspect the value
21826    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21827    at location pc - 12 and the top 8 bits are set, then we know
21828    that there is a function name embedded immediately preceding this
21829    location and has length ((pc[-3]) & 0xff000000).
21830
21831    We assume that pc is declared as a pointer to an unsigned long.
21832
21833    It is of no benefit to output the function name if we are assembling
21834    a leaf function.  These function types will not contain a stack
21835    backtrace structure, therefore it is not possible to determine the
21836    function name.  */
21837 void
21838 arm_poke_function_name (FILE *stream, const char *name)
21839 {
21840   unsigned long alignlength;
21841   unsigned long length;
21842   rtx           x;
21843
21844   length      = strlen (name) + 1;
21845   alignlength = ROUND_UP_WORD (length);
21846
21847   ASM_OUTPUT_ASCII (stream, name, length);
21848   ASM_OUTPUT_ALIGN (stream, 2);
21849   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21850   assemble_aligned_integer (UNITS_PER_WORD, x);
21851 }
21852
21853 /* Place some comments into the assembler stream
21854    describing the current function.  */
21855 static void
21856 arm_output_function_prologue (FILE *f)
21857 {
21858   unsigned long func_type;
21859
21860   /* Sanity check.  */
21861   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21862
21863   func_type = arm_current_func_type ();
21864
21865   switch ((int) ARM_FUNC_TYPE (func_type))
21866     {
21867     default:
21868     case ARM_FT_NORMAL:
21869       break;
21870     case ARM_FT_INTERWORKED:
21871       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21872       break;
21873     case ARM_FT_ISR:
21874       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21875       break;
21876     case ARM_FT_FIQ:
21877       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21878       break;
21879     case ARM_FT_EXCEPTION:
21880       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21881       break;
21882     }
21883
21884   if (IS_NAKED (func_type))
21885     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21886
21887   if (IS_VOLATILE (func_type))
21888     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21889
21890   if (IS_NESTED (func_type))
21891     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21892   if (IS_STACKALIGN (func_type))
21893     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21894   if (IS_CMSE_ENTRY (func_type))
21895     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21896
21897   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21898                (HOST_WIDE_INT) crtl->args.size,
21899                crtl->args.pretend_args_size,
21900                (HOST_WIDE_INT) get_frame_size ());
21901
21902   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21903                frame_pointer_needed,
21904                cfun->machine->uses_anonymous_args);
21905
21906   if (cfun->machine->lr_save_eliminated)
21907     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21908
21909   if (crtl->calls_eh_return)
21910     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21911
21912 }
21913
21914 static void
21915 arm_output_function_epilogue (FILE *)
21916 {
21917   arm_stack_offsets *offsets;
21918
21919   if (TARGET_THUMB1)
21920     {
21921       int regno;
21922
21923       /* Emit any call-via-reg trampolines that are needed for v4t support
21924          of call_reg and call_value_reg type insns.  */
21925       for (regno = 0; regno < LR_REGNUM; regno++)
21926         {
21927           rtx label = cfun->machine->call_via[regno];
21928
21929           if (label != NULL)
21930             {
21931               switch_to_section (function_section (current_function_decl));
21932               targetm.asm_out.internal_label (asm_out_file, "L",
21933                                               CODE_LABEL_NUMBER (label));
21934               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21935             }
21936         }
21937
21938       /* ??? Probably not safe to set this here, since it assumes that a
21939          function will be emitted as assembly immediately after we generate
21940          RTL for it.  This does not happen for inline functions.  */
21941       cfun->machine->return_used_this_function = 0;
21942     }
21943   else /* TARGET_32BIT */
21944     {
21945       /* We need to take into account any stack-frame rounding.  */
21946       offsets = arm_get_frame_offsets ();
21947
21948       gcc_assert (!use_return_insn (FALSE, NULL)
21949                   || (cfun->machine->return_used_this_function != 0)
21950                   || offsets->saved_regs == offsets->outgoing_args
21951                   || frame_pointer_needed);
21952     }
21953 }
21954
21955 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21956    STR and STRD.  If an even number of registers are being pushed, one
21957    or more STRD patterns are created for each register pair.  If an
21958    odd number of registers are pushed, emit an initial STR followed by
21959    as many STRD instructions as are needed.  This works best when the
21960    stack is initially 64-bit aligned (the normal case), since it
21961    ensures that each STRD is also 64-bit aligned.  */
21962 static void
21963 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21964 {
21965   int num_regs = 0;
21966   int i;
21967   int regno;
21968   rtx par = NULL_RTX;
21969   rtx dwarf = NULL_RTX;
21970   rtx tmp;
21971   bool first = true;
21972
21973   num_regs = bit_count (saved_regs_mask);
21974
21975   /* Must be at least one register to save, and can't save SP or PC.  */
21976   gcc_assert (num_regs > 0 && num_regs <= 14);
21977   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21978   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21979
21980   /* Create sequence for DWARF info.  All the frame-related data for
21981      debugging is held in this wrapper.  */
21982   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21983
21984   /* Describe the stack adjustment.  */
21985   tmp = gen_rtx_SET (stack_pointer_rtx,
21986                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21987   RTX_FRAME_RELATED_P (tmp) = 1;
21988   XVECEXP (dwarf, 0, 0) = tmp;
21989
21990   /* Find the first register.  */
21991   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21992     ;
21993
21994   i = 0;
21995
21996   /* If there's an odd number of registers to push.  Start off by
21997      pushing a single register.  This ensures that subsequent strd
21998      operations are dword aligned (assuming that SP was originally
21999      64-bit aligned).  */
22000   if ((num_regs & 1) != 0)
22001     {
22002       rtx reg, mem, insn;
22003
22004       reg = gen_rtx_REG (SImode, regno);
22005       if (num_regs == 1)
22006         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
22007                                                      stack_pointer_rtx));
22008       else
22009         mem = gen_frame_mem (Pmode,
22010                              gen_rtx_PRE_MODIFY
22011                              (Pmode, stack_pointer_rtx,
22012                               plus_constant (Pmode, stack_pointer_rtx,
22013                                              -4 * num_regs)));
22014
22015       tmp = gen_rtx_SET (mem, reg);
22016       RTX_FRAME_RELATED_P (tmp) = 1;
22017       insn = emit_insn (tmp);
22018       RTX_FRAME_RELATED_P (insn) = 1;
22019       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22020       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
22021       RTX_FRAME_RELATED_P (tmp) = 1;
22022       i++;
22023       regno++;
22024       XVECEXP (dwarf, 0, i) = tmp;
22025       first = false;
22026     }
22027
22028   while (i < num_regs)
22029     if (saved_regs_mask & (1 << regno))
22030       {
22031         rtx reg1, reg2, mem1, mem2;
22032         rtx tmp0, tmp1, tmp2;
22033         int regno2;
22034
22035         /* Find the register to pair with this one.  */
22036         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
22037              regno2++)
22038           ;
22039
22040         reg1 = gen_rtx_REG (SImode, regno);
22041         reg2 = gen_rtx_REG (SImode, regno2);
22042
22043         if (first)
22044           {
22045             rtx insn;
22046
22047             first = false;
22048             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22049                                                         stack_pointer_rtx,
22050                                                         -4 * num_regs));
22051             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22052                                                         stack_pointer_rtx,
22053                                                         -4 * (num_regs - 1)));
22054             tmp0 = gen_rtx_SET (stack_pointer_rtx,
22055                                 plus_constant (Pmode, stack_pointer_rtx,
22056                                                -4 * (num_regs)));
22057             tmp1 = gen_rtx_SET (mem1, reg1);
22058             tmp2 = gen_rtx_SET (mem2, reg2);
22059             RTX_FRAME_RELATED_P (tmp0) = 1;
22060             RTX_FRAME_RELATED_P (tmp1) = 1;
22061             RTX_FRAME_RELATED_P (tmp2) = 1;
22062             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22063             XVECEXP (par, 0, 0) = tmp0;
22064             XVECEXP (par, 0, 1) = tmp1;
22065             XVECEXP (par, 0, 2) = tmp2;
22066             insn = emit_insn (par);
22067             RTX_FRAME_RELATED_P (insn) = 1;
22068             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22069           }
22070         else
22071           {
22072             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22073                                                         stack_pointer_rtx,
22074                                                         4 * i));
22075             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22076                                                         stack_pointer_rtx,
22077                                                         4 * (i + 1)));
22078             tmp1 = gen_rtx_SET (mem1, reg1);
22079             tmp2 = gen_rtx_SET (mem2, reg2);
22080             RTX_FRAME_RELATED_P (tmp1) = 1;
22081             RTX_FRAME_RELATED_P (tmp2) = 1;
22082             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22083             XVECEXP (par, 0, 0) = tmp1;
22084             XVECEXP (par, 0, 1) = tmp2;
22085             emit_insn (par);
22086           }
22087
22088         /* Create unwind information.  This is an approximation.  */
22089         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22090                                            plus_constant (Pmode,
22091                                                           stack_pointer_rtx,
22092                                                           4 * i)),
22093                             reg1);
22094         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22095                                            plus_constant (Pmode,
22096                                                           stack_pointer_rtx,
22097                                                           4 * (i + 1))),
22098                             reg2);
22099
22100         RTX_FRAME_RELATED_P (tmp1) = 1;
22101         RTX_FRAME_RELATED_P (tmp2) = 1;
22102         XVECEXP (dwarf, 0, i + 1) = tmp1;
22103         XVECEXP (dwarf, 0, i + 2) = tmp2;
22104         i += 2;
22105         regno = regno2 + 1;
22106       }
22107     else
22108       regno++;
22109
22110   return;
22111 }
22112
22113 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
22114    whenever possible, otherwise it emits single-word stores.  The first store
22115    also allocates stack space for all saved registers, using writeback with
22116    post-addressing mode.  All other stores use offset addressing.  If no STRD
22117    can be emitted, this function emits a sequence of single-word stores,
22118    and not an STM as before, because single-word stores provide more freedom
22119    scheduling and can be turned into an STM by peephole optimizations.  */
22120 static void
22121 arm_emit_strd_push (unsigned long saved_regs_mask)
22122 {
22123   int num_regs = 0;
22124   int i, j, dwarf_index  = 0;
22125   int offset = 0;
22126   rtx dwarf = NULL_RTX;
22127   rtx insn = NULL_RTX;
22128   rtx tmp, mem;
22129
22130   /* TODO: A more efficient code can be emitted by changing the
22131      layout, e.g., first push all pairs that can use STRD to keep the
22132      stack aligned, and then push all other registers.  */
22133   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22134     if (saved_regs_mask & (1 << i))
22135       num_regs++;
22136
22137   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22138   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22139   gcc_assert (num_regs > 0);
22140
22141   /* Create sequence for DWARF info.  */
22142   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22143
22144   /* For dwarf info, we generate explicit stack update.  */
22145   tmp = gen_rtx_SET (stack_pointer_rtx,
22146                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22147   RTX_FRAME_RELATED_P (tmp) = 1;
22148   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22149
22150   /* Save registers.  */
22151   offset = - 4 * num_regs;
22152   j = 0;
22153   while (j <= LAST_ARM_REGNUM)
22154     if (saved_regs_mask & (1 << j))
22155       {
22156         if ((j % 2 == 0)
22157             && (saved_regs_mask & (1 << (j + 1))))
22158           {
22159             /* Current register and previous register form register pair for
22160                which STRD can be generated.  */
22161             if (offset < 0)
22162               {
22163                 /* Allocate stack space for all saved registers.  */
22164                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22165                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22166                 mem = gen_frame_mem (DImode, tmp);
22167                 offset = 0;
22168               }
22169             else if (offset > 0)
22170               mem = gen_frame_mem (DImode,
22171                                    plus_constant (Pmode,
22172                                                   stack_pointer_rtx,
22173                                                   offset));
22174             else
22175               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22176
22177             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22178             RTX_FRAME_RELATED_P (tmp) = 1;
22179             tmp = emit_insn (tmp);
22180
22181             /* Record the first store insn.  */
22182             if (dwarf_index == 1)
22183               insn = tmp;
22184
22185             /* Generate dwarf info.  */
22186             mem = gen_frame_mem (SImode,
22187                                  plus_constant (Pmode,
22188                                                 stack_pointer_rtx,
22189                                                 offset));
22190             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22191             RTX_FRAME_RELATED_P (tmp) = 1;
22192             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22193
22194             mem = gen_frame_mem (SImode,
22195                                  plus_constant (Pmode,
22196                                                 stack_pointer_rtx,
22197                                                 offset + 4));
22198             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22199             RTX_FRAME_RELATED_P (tmp) = 1;
22200             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22201
22202             offset += 8;
22203             j += 2;
22204           }
22205         else
22206           {
22207             /* Emit a single word store.  */
22208             if (offset < 0)
22209               {
22210                 /* Allocate stack space for all saved registers.  */
22211                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22212                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22213                 mem = gen_frame_mem (SImode, tmp);
22214                 offset = 0;
22215               }
22216             else if (offset > 0)
22217               mem = gen_frame_mem (SImode,
22218                                    plus_constant (Pmode,
22219                                                   stack_pointer_rtx,
22220                                                   offset));
22221             else
22222               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22223
22224             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22225             RTX_FRAME_RELATED_P (tmp) = 1;
22226             tmp = emit_insn (tmp);
22227
22228             /* Record the first store insn.  */
22229             if (dwarf_index == 1)
22230               insn = tmp;
22231
22232             /* Generate dwarf info.  */
22233             mem = gen_frame_mem (SImode,
22234                                  plus_constant(Pmode,
22235                                                stack_pointer_rtx,
22236                                                offset));
22237             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22238             RTX_FRAME_RELATED_P (tmp) = 1;
22239             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22240
22241             offset += 4;
22242             j += 1;
22243           }
22244       }
22245     else
22246       j++;
22247
22248   /* Attach dwarf info to the first insn we generate.  */
22249   gcc_assert (insn != NULL_RTX);
22250   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22251   RTX_FRAME_RELATED_P (insn) = 1;
22252 }
22253
22254 /* Generate and emit an insn that we will recognize as a push_multi.
22255    Unfortunately, since this insn does not reflect very well the actual
22256    semantics of the operation, we need to annotate the insn for the benefit
22257    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
22258    MASK for registers that should be annotated for DWARF2 frame unwind
22259    information.  */
22260 static rtx
22261 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22262 {
22263   int num_regs = 0;
22264   int num_dwarf_regs = 0;
22265   int i, j;
22266   rtx par;
22267   rtx dwarf;
22268   int dwarf_par_index;
22269   rtx tmp, reg;
22270
22271   /* We don't record the PC in the dwarf frame information.  */
22272   dwarf_regs_mask &= ~(1 << PC_REGNUM);
22273
22274   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22275     {
22276       if (mask & (1 << i))
22277         num_regs++;
22278       if (dwarf_regs_mask & (1 << i))
22279         num_dwarf_regs++;
22280     }
22281
22282   gcc_assert (num_regs && num_regs <= 16);
22283   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22284
22285   /* For the body of the insn we are going to generate an UNSPEC in
22286      parallel with several USEs.  This allows the insn to be recognized
22287      by the push_multi pattern in the arm.md file.
22288
22289      The body of the insn looks something like this:
22290
22291        (parallel [
22292            (set (mem:BLK (pre_modify:SI (reg:SI sp)
22293                                         (const_int:SI <num>)))
22294                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22295            (use (reg:SI XX))
22296            (use (reg:SI YY))
22297            ...
22298         ])
22299
22300      For the frame note however, we try to be more explicit and actually
22301      show each register being stored into the stack frame, plus a (single)
22302      decrement of the stack pointer.  We do it this way in order to be
22303      friendly to the stack unwinding code, which only wants to see a single
22304      stack decrement per instruction.  The RTL we generate for the note looks
22305      something like this:
22306
22307       (sequence [
22308            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22309            (set (mem:SI (reg:SI sp)) (reg:SI r4))
22310            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22311            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22312            ...
22313         ])
22314
22315      FIXME:: In an ideal world the PRE_MODIFY would not exist and
22316      instead we'd have a parallel expression detailing all
22317      the stores to the various memory addresses so that debug
22318      information is more up-to-date. Remember however while writing
22319      this to take care of the constraints with the push instruction.
22320
22321      Note also that this has to be taken care of for the VFP registers.
22322
22323      For more see PR43399.  */
22324
22325   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22326   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22327   dwarf_par_index = 1;
22328
22329   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22330     {
22331       if (mask & (1 << i))
22332         {
22333           /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22334              following example reg-reg copy of SP to IP register is handled
22335              through .cfi_def_cfa_register directive and the .cfi_offset
22336              directive for IP register is skipped by dwarf code emitter.
22337              Example:
22338                 mov     ip, sp
22339                 .cfi_def_cfa_register 12
22340                 push    {fp, ip, lr, pc}
22341                 .cfi_offset 11, -16
22342                 .cfi_offset 13, -12
22343                 .cfi_offset 14, -8
22344
22345              Where as Arm-specific .save directive handling is different to that
22346              of dwarf code emitter and it doesn't consider reg-reg copies while
22347              updating the register list.  When PACBTI is enabled we manually
22348              updated the .save directive register list to use "ra_auth_code"
22349              (pseduo register 143) instead of IP register as shown in following
22350              pseduo code.
22351              Example:
22352                 pacbti  ip, lr, sp
22353                 .cfi_register 143, 12
22354                 push    {r3, r7, ip, lr}
22355                 .save {r3, r7, ra_auth_code, lr}
22356           */
22357           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22358           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22359             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22360
22361           XVECEXP (par, 0, 0)
22362             = gen_rtx_SET (gen_frame_mem
22363                            (BLKmode,
22364                             gen_rtx_PRE_MODIFY (Pmode,
22365                                                 stack_pointer_rtx,
22366                                                 plus_constant
22367                                                 (Pmode, stack_pointer_rtx,
22368                                                  -4 * num_regs))
22369                             ),
22370                            gen_rtx_UNSPEC (BLKmode,
22371                                            gen_rtvec (1, reg),
22372                                            UNSPEC_PUSH_MULT));
22373
22374           if (dwarf_regs_mask & (1 << i))
22375             {
22376               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22377                                  dwarf_reg);
22378               RTX_FRAME_RELATED_P (tmp) = 1;
22379               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22380             }
22381
22382           break;
22383         }
22384     }
22385
22386   for (j = 1, i++; j < num_regs; i++)
22387     {
22388       if (mask & (1 << i))
22389         {
22390           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22391           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22392             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22393
22394           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22395
22396           if (dwarf_regs_mask & (1 << i))
22397             {
22398               tmp
22399                 = gen_rtx_SET (gen_frame_mem
22400                                (SImode,
22401                                 plus_constant (Pmode, stack_pointer_rtx,
22402                                                4 * j)),
22403                                dwarf_reg);
22404               RTX_FRAME_RELATED_P (tmp) = 1;
22405               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22406             }
22407
22408           j++;
22409         }
22410     }
22411
22412   par = emit_insn (par);
22413
22414   tmp = gen_rtx_SET (stack_pointer_rtx,
22415                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22416   RTX_FRAME_RELATED_P (tmp) = 1;
22417   XVECEXP (dwarf, 0, 0) = tmp;
22418
22419   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22420
22421   return par;
22422 }
22423
22424 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22425    SIZE is the offset to be adjusted.
22426    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22427 static void
22428 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22429 {
22430   rtx dwarf;
22431
22432   RTX_FRAME_RELATED_P (insn) = 1;
22433   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22434   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22435 }
22436
22437 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22438    SAVED_REGS_MASK shows which registers need to be restored.
22439
22440    Unfortunately, since this insn does not reflect very well the actual
22441    semantics of the operation, we need to annotate the insn for the benefit
22442    of DWARF2 frame unwind information.  */
22443 static void
22444 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22445 {
22446   int num_regs = 0;
22447   int i, j;
22448   rtx par;
22449   rtx dwarf = NULL_RTX;
22450   rtx tmp, reg;
22451   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22452   int offset_adj;
22453   int emit_update;
22454
22455   offset_adj = return_in_pc ? 1 : 0;
22456   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22457     if (saved_regs_mask & (1 << i))
22458       num_regs++;
22459
22460   gcc_assert (num_regs && num_regs <= 16);
22461
22462   /* If SP is in reglist, then we don't emit SP update insn.  */
22463   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22464
22465   /* The parallel needs to hold num_regs SETs
22466      and one SET for the stack update.  */
22467   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22468
22469   if (return_in_pc)
22470     XVECEXP (par, 0, 0) = ret_rtx;
22471
22472   if (emit_update)
22473     {
22474       /* Increment the stack pointer, based on there being
22475          num_regs 4-byte registers to restore.  */
22476       tmp = gen_rtx_SET (stack_pointer_rtx,
22477                          plus_constant (Pmode,
22478                                         stack_pointer_rtx,
22479                                         4 * num_regs));
22480       RTX_FRAME_RELATED_P (tmp) = 1;
22481       XVECEXP (par, 0, offset_adj) = tmp;
22482     }
22483
22484   /* Now restore every reg, which may include PC.  */
22485   for (j = 0, i = 0; j < num_regs; i++)
22486     if (saved_regs_mask & (1 << i))
22487       {
22488         rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22489         if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22490           dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22491         if ((num_regs == 1) && emit_update && !return_in_pc)
22492           {
22493             /* Emit single load with writeback.  */
22494             tmp = gen_frame_mem (SImode,
22495                                  gen_rtx_POST_INC (Pmode,
22496                                                    stack_pointer_rtx));
22497             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22498             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22499                                               dwarf);
22500             return;
22501           }
22502
22503         tmp = gen_rtx_SET (reg,
22504                            gen_frame_mem
22505                            (SImode,
22506                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22507         RTX_FRAME_RELATED_P (tmp) = 1;
22508         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22509
22510         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22511            should not have PC, skip PC.  */
22512         if (i != PC_REGNUM)
22513           dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22514
22515         j++;
22516       }
22517
22518   if (return_in_pc)
22519     par = emit_jump_insn (par);
22520   else
22521     par = emit_insn (par);
22522
22523   REG_NOTES (par) = dwarf;
22524   if (!return_in_pc)
22525     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22526                                  stack_pointer_rtx, stack_pointer_rtx);
22527 }
22528
22529 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22530    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22531
22532    Unfortunately, since this insn does not reflect very well the actual
22533    semantics of the operation, we need to annotate the insn for the benefit
22534    of DWARF2 frame unwind information.  */
22535 static void
22536 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22537 {
22538   int i, j;
22539   rtx par;
22540   rtx dwarf = NULL_RTX;
22541   rtx tmp, reg;
22542
22543   gcc_assert (num_regs && num_regs <= 32);
22544
22545     /* Workaround ARM10 VFPr1 bug.  */
22546   if (num_regs == 2 && !arm_arch6)
22547     {
22548       if (first_reg == 15)
22549         first_reg--;
22550
22551       num_regs++;
22552     }
22553
22554   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22555      there could be up to 32 D-registers to restore.
22556      If there are more than 16 D-registers, make two recursive calls,
22557      each of which emits one pop_multi instruction.  */
22558   if (num_regs > 16)
22559     {
22560       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22561       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22562       return;
22563     }
22564
22565   /* The parallel needs to hold num_regs SETs
22566      and one SET for the stack update.  */
22567   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22568
22569   /* Increment the stack pointer, based on there being
22570      num_regs 8-byte registers to restore.  */
22571   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22572   RTX_FRAME_RELATED_P (tmp) = 1;
22573   XVECEXP (par, 0, 0) = tmp;
22574
22575   /* Now show every reg that will be restored, using a SET for each.  */
22576   for (j = 0, i=first_reg; j < num_regs; i += 2)
22577     {
22578       reg = gen_rtx_REG (DFmode, i);
22579
22580       tmp = gen_rtx_SET (reg,
22581                          gen_frame_mem
22582                          (DFmode,
22583                           plus_constant (Pmode, base_reg, 8 * j)));
22584       RTX_FRAME_RELATED_P (tmp) = 1;
22585       XVECEXP (par, 0, j + 1) = tmp;
22586
22587       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22588
22589       j++;
22590     }
22591
22592   par = emit_insn (par);
22593   REG_NOTES (par) = dwarf;
22594
22595   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22596   if (REGNO (base_reg) == IP_REGNUM)
22597     {
22598       RTX_FRAME_RELATED_P (par) = 1;
22599       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22600     }
22601   else
22602     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22603                                  base_reg, base_reg);
22604 }
22605
22606 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22607    number of registers are being popped, multiple LDRD patterns are created for
22608    all register pairs.  If odd number of registers are popped, last register is
22609    loaded by using LDR pattern.  */
22610 static void
22611 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22612 {
22613   int num_regs = 0;
22614   int i, j;
22615   rtx par = NULL_RTX;
22616   rtx dwarf = NULL_RTX;
22617   rtx tmp, reg, tmp1;
22618   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22619
22620   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22621     if (saved_regs_mask & (1 << i))
22622       num_regs++;
22623
22624   gcc_assert (num_regs && num_regs <= 16);
22625
22626   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22627      to be popped.  So, if num_regs is even, now it will become odd,
22628      and we can generate pop with PC.  If num_regs is odd, it will be
22629      even now, and ldr with return can be generated for PC.  */
22630   if (return_in_pc)
22631     num_regs--;
22632
22633   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22634
22635   /* Var j iterates over all the registers to gather all the registers in
22636      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22637      A PARALLEL RTX of register-pair is created here, so that pattern for
22638      LDRD can be matched.  As PC is always last register to be popped, and
22639      we have already decremented num_regs if PC, we don't have to worry
22640      about PC in this loop.  */
22641   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22642     if (saved_regs_mask & (1 << j))
22643       {
22644         /* Create RTX for memory load.  */
22645         reg = gen_rtx_REG (SImode, j);
22646         tmp = gen_rtx_SET (reg,
22647                            gen_frame_mem (SImode,
22648                                plus_constant (Pmode,
22649                                               stack_pointer_rtx, 4 * i)));
22650         RTX_FRAME_RELATED_P (tmp) = 1;
22651
22652         if (i % 2 == 0)
22653           {
22654             /* When saved-register index (i) is even, the RTX to be emitted is
22655                yet to be created.  Hence create it first.  The LDRD pattern we
22656                are generating is :
22657                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22658                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22659                where target registers need not be consecutive.  */
22660             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22661             dwarf = NULL_RTX;
22662           }
22663
22664         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22665            added as 0th element and if i is odd, reg_i is added as 1st element
22666            of LDRD pattern shown above.  */
22667         XVECEXP (par, 0, (i % 2)) = tmp;
22668         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22669
22670         if ((i % 2) == 1)
22671           {
22672             /* When saved-register index (i) is odd, RTXs for both the registers
22673                to be loaded are generated in above given LDRD pattern, and the
22674                pattern can be emitted now.  */
22675             par = emit_insn (par);
22676             REG_NOTES (par) = dwarf;
22677             RTX_FRAME_RELATED_P (par) = 1;
22678           }
22679
22680         i++;
22681       }
22682
22683   /* If the number of registers pushed is odd AND return_in_pc is false OR
22684      number of registers are even AND return_in_pc is true, last register is
22685      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22686      then LDR with post increment.  */
22687
22688   /* Increment the stack pointer, based on there being
22689      num_regs 4-byte registers to restore.  */
22690   tmp = gen_rtx_SET (stack_pointer_rtx,
22691                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22692   RTX_FRAME_RELATED_P (tmp) = 1;
22693   tmp = emit_insn (tmp);
22694   if (!return_in_pc)
22695     {
22696       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22697                                    stack_pointer_rtx, stack_pointer_rtx);
22698     }
22699
22700   dwarf = NULL_RTX;
22701
22702   if (((num_regs % 2) == 1 && !return_in_pc)
22703       || ((num_regs % 2) == 0 && return_in_pc))
22704     {
22705       /* Scan for the single register to be popped.  Skip until the saved
22706          register is found.  */
22707       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22708
22709       /* Gen LDR with post increment here.  */
22710       tmp1 = gen_rtx_MEM (SImode,
22711                           gen_rtx_POST_INC (SImode,
22712                                             stack_pointer_rtx));
22713       set_mem_alias_set (tmp1, get_frame_alias_set ());
22714
22715       reg = gen_rtx_REG (SImode, j);
22716       tmp = gen_rtx_SET (reg, tmp1);
22717       RTX_FRAME_RELATED_P (tmp) = 1;
22718       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22719
22720       if (return_in_pc)
22721         {
22722           /* If return_in_pc, j must be PC_REGNUM.  */
22723           gcc_assert (j == PC_REGNUM);
22724           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22725           XVECEXP (par, 0, 0) = ret_rtx;
22726           XVECEXP (par, 0, 1) = tmp;
22727           par = emit_jump_insn (par);
22728         }
22729       else
22730         {
22731           par = emit_insn (tmp);
22732           REG_NOTES (par) = dwarf;
22733           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22734                                        stack_pointer_rtx, stack_pointer_rtx);
22735         }
22736
22737     }
22738   else if ((num_regs % 2) == 1 && return_in_pc)
22739     {
22740       /* There are 2 registers to be popped.  So, generate the pattern
22741          pop_multiple_with_stack_update_and_return to pop in PC.  */
22742       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22743     }
22744
22745   return;
22746 }
22747
22748 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22749    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22750    offset addressing and then generates one separate stack udpate. This provides
22751    more scheduling freedom, compared to writeback on every load.  However,
22752    if the function returns using load into PC directly
22753    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22754    before the last load.  TODO: Add a peephole optimization to recognize
22755    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22756    peephole optimization to merge the load at stack-offset zero
22757    with the stack update instruction using load with writeback
22758    in post-index addressing mode.  */
22759 static void
22760 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22761 {
22762   int j = 0;
22763   int offset = 0;
22764   rtx par = NULL_RTX;
22765   rtx dwarf = NULL_RTX;
22766   rtx tmp, mem;
22767
22768   /* Restore saved registers.  */
22769   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22770   j = 0;
22771   while (j <= LAST_ARM_REGNUM)
22772     if (saved_regs_mask & (1 << j))
22773       {
22774         if ((j % 2) == 0
22775             && (saved_regs_mask & (1 << (j + 1)))
22776             && (j + 1) != PC_REGNUM)
22777           {
22778             /* Current register and next register form register pair for which
22779                LDRD can be generated. PC is always the last register popped, and
22780                we handle it separately.  */
22781             if (offset > 0)
22782               mem = gen_frame_mem (DImode,
22783                                    plus_constant (Pmode,
22784                                                   stack_pointer_rtx,
22785                                                   offset));
22786             else
22787               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22788
22789             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22790             tmp = emit_insn (tmp);
22791             RTX_FRAME_RELATED_P (tmp) = 1;
22792
22793             /* Generate dwarf info.  */
22794
22795             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22796                                     gen_rtx_REG (SImode, j),
22797                                     NULL_RTX);
22798             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22799                                     gen_rtx_REG (SImode, j + 1),
22800                                     dwarf);
22801
22802             REG_NOTES (tmp) = dwarf;
22803
22804             offset += 8;
22805             j += 2;
22806           }
22807         else if (j != PC_REGNUM)
22808           {
22809             /* Emit a single word load.  */
22810             if (offset > 0)
22811               mem = gen_frame_mem (SImode,
22812                                    plus_constant (Pmode,
22813                                                   stack_pointer_rtx,
22814                                                   offset));
22815             else
22816               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22817
22818             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22819             tmp = emit_insn (tmp);
22820             RTX_FRAME_RELATED_P (tmp) = 1;
22821
22822             /* Generate dwarf info.  */
22823             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22824                                               gen_rtx_REG (SImode, j),
22825                                               NULL_RTX);
22826
22827             offset += 4;
22828             j += 1;
22829           }
22830         else /* j == PC_REGNUM */
22831           j++;
22832       }
22833     else
22834       j++;
22835
22836   /* Update the stack.  */
22837   if (offset > 0)
22838     {
22839       tmp = gen_rtx_SET (stack_pointer_rtx,
22840                          plus_constant (Pmode,
22841                                         stack_pointer_rtx,
22842                                         offset));
22843       tmp = emit_insn (tmp);
22844       arm_add_cfa_adjust_cfa_note (tmp, offset,
22845                                    stack_pointer_rtx, stack_pointer_rtx);
22846       offset = 0;
22847     }
22848
22849   if (saved_regs_mask & (1 << PC_REGNUM))
22850     {
22851       /* Only PC is to be popped.  */
22852       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22853       XVECEXP (par, 0, 0) = ret_rtx;
22854       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22855                          gen_frame_mem (SImode,
22856                                         gen_rtx_POST_INC (SImode,
22857                                                           stack_pointer_rtx)));
22858       RTX_FRAME_RELATED_P (tmp) = 1;
22859       XVECEXP (par, 0, 1) = tmp;
22860       par = emit_jump_insn (par);
22861
22862       /* Generate dwarf info.  */
22863       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22864                               gen_rtx_REG (SImode, PC_REGNUM),
22865                               NULL_RTX);
22866       REG_NOTES (par) = dwarf;
22867       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22868                                    stack_pointer_rtx, stack_pointer_rtx);
22869     }
22870 }
22871
22872 /* Calculate the size of the return value that is passed in registers.  */
22873 static unsigned
22874 arm_size_return_regs (void)
22875 {
22876   machine_mode mode;
22877
22878   if (crtl->return_rtx != 0)
22879     mode = GET_MODE (crtl->return_rtx);
22880   else
22881     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22882
22883   return GET_MODE_SIZE (mode);
22884 }
22885
22886 /* Return true if the current function needs to save/restore LR.  */
22887 static bool
22888 thumb_force_lr_save (void)
22889 {
22890   return !cfun->machine->lr_save_eliminated
22891          && (!crtl->is_leaf
22892              || thumb_far_jump_used_p ()
22893              || df_regs_ever_live_p (LR_REGNUM));
22894 }
22895
22896 /* We do not know if r3 will be available because
22897    we do have an indirect tailcall happening in this
22898    particular case.  */
22899 static bool
22900 is_indirect_tailcall_p (rtx call)
22901 {
22902   rtx pat = PATTERN (call);
22903
22904   /* Indirect tail call.  */
22905   pat = XVECEXP (pat, 0, 0);
22906   if (GET_CODE (pat) == SET)
22907     pat = SET_SRC (pat);
22908
22909   pat = XEXP (XEXP (pat, 0), 0);
22910   return REG_P (pat);
22911 }
22912
22913 /* Return true if r3 is used by any of the tail call insns in the
22914    current function.  */
22915 static bool
22916 any_sibcall_could_use_r3 (void)
22917 {
22918   edge_iterator ei;
22919   edge e;
22920
22921   if (!crtl->tail_call_emit)
22922     return false;
22923   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22924     if (e->flags & EDGE_SIBCALL)
22925       {
22926         rtx_insn *call = BB_END (e->src);
22927         if (!CALL_P (call))
22928           call = prev_nonnote_nondebug_insn (call);
22929         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22930         if (find_regno_fusage (call, USE, 3)
22931             || is_indirect_tailcall_p (call))
22932           return true;
22933       }
22934   return false;
22935 }
22936
22937
22938 /* Compute the distance from register FROM to register TO.
22939    These can be the arg pointer (26), the soft frame pointer (25),
22940    the stack pointer (13) or the hard frame pointer (11).
22941    In thumb mode r7 is used as the soft frame pointer, if needed.
22942    Typical stack layout looks like this:
22943
22944        old stack pointer -> |    |
22945                              ----
22946                             |    | \
22947                             |    |   saved arguments for
22948                             |    |   vararg functions
22949                             |    | /
22950                               --
22951    hard FP & arg pointer -> |    | \
22952                             |    |   stack
22953                             |    |   frame
22954                             |    | /
22955                               --
22956                             |    | \
22957                             |    |   call saved
22958                             |    |   registers
22959       soft frame pointer -> |    | /
22960                               --
22961                             |    | \
22962                             |    |   local
22963                             |    |   variables
22964      locals base pointer -> |    | /
22965                               --
22966                             |    | \
22967                             |    |   outgoing
22968                             |    |   arguments
22969    current stack pointer -> |    | /
22970                               --
22971
22972   For a given function some or all of these stack components
22973   may not be needed, giving rise to the possibility of
22974   eliminating some of the registers.
22975
22976   The values returned by this function must reflect the behavior
22977   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22978
22979   The sign of the number returned reflects the direction of stack
22980   growth, so the values are positive for all eliminations except
22981   from the soft frame pointer to the hard frame pointer.
22982
22983   SFP may point just inside the local variables block to ensure correct
22984   alignment.  */
22985
22986
22987 /* Return cached stack offsets.  */
22988
22989 static arm_stack_offsets *
22990 arm_get_frame_offsets (void)
22991 {
22992   struct arm_stack_offsets *offsets;
22993
22994   offsets = &cfun->machine->stack_offsets;
22995
22996   return offsets;
22997 }
22998
22999
23000 /* Calculate stack offsets.  These are used to calculate register elimination
23001    offsets and in prologue/epilogue code.  Also calculates which registers
23002    should be saved.  */
23003
23004 static void
23005 arm_compute_frame_layout (void)
23006 {
23007   struct arm_stack_offsets *offsets;
23008   unsigned long func_type;
23009   int saved;
23010   int core_saved;
23011   HOST_WIDE_INT frame_size;
23012   int i;
23013
23014   offsets = &cfun->machine->stack_offsets;
23015
23016   /* Initially this is the size of the local variables.  It will translated
23017      into an offset once we have determined the size of preceding data.  */
23018   frame_size = ROUND_UP_WORD (get_frame_size ());
23019
23020   /* Space for variadic functions.  */
23021   offsets->saved_args = crtl->args.pretend_args_size;
23022
23023   /* In Thumb mode this is incorrect, but never used.  */
23024   offsets->frame
23025     = (offsets->saved_args
23026        + arm_compute_static_chain_stack_bytes ()
23027        + (frame_pointer_needed ? 4 : 0));
23028
23029   if (TARGET_32BIT)
23030     {
23031       unsigned int regno;
23032
23033       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
23034       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23035       saved = core_saved;
23036
23037       /* We know that SP will be doubleword aligned on entry, and we must
23038          preserve that condition at any subroutine call.  We also require the
23039          soft frame pointer to be doubleword aligned.  */
23040
23041       if (TARGET_REALLY_IWMMXT)
23042         {
23043           /* Check for the call-saved iWMMXt registers.  */
23044           for (regno = FIRST_IWMMXT_REGNUM;
23045                regno <= LAST_IWMMXT_REGNUM;
23046                regno++)
23047             if (reg_needs_saving_p (regno))
23048               saved += 8;
23049         }
23050
23051       func_type = arm_current_func_type ();
23052       /* Space for saved VFP registers.  */
23053       if (! IS_VOLATILE (func_type)
23054           && TARGET_VFP_BASE)
23055         saved += arm_get_vfp_saved_size ();
23056
23057       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23058          nonecure entry functions with VSTR/VLDR.  */
23059       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23060         saved += 4;
23061     }
23062   else /* TARGET_THUMB1 */
23063     {
23064       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23065       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23066       saved = core_saved;
23067       if (TARGET_BACKTRACE)
23068         saved += 16;
23069     }
23070
23071   /* Saved registers include the stack frame.  */
23072   offsets->saved_regs
23073     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23074   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23075
23076   /* A leaf function does not need any stack alignment if it has nothing
23077      on the stack.  */
23078   if (crtl->is_leaf && frame_size == 0
23079       /* However if it calls alloca(), we have a dynamically allocated
23080          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
23081       && ! cfun->calls_alloca)
23082     {
23083       offsets->outgoing_args = offsets->soft_frame;
23084       offsets->locals_base = offsets->soft_frame;
23085       return;
23086     }
23087
23088   /* Ensure SFP has the correct alignment.  */
23089   if (ARM_DOUBLEWORD_ALIGN
23090       && (offsets->soft_frame & 7))
23091     {
23092       offsets->soft_frame += 4;
23093       /* Try to align stack by pushing an extra reg.  Don't bother doing this
23094          when there is a stack frame as the alignment will be rolled into
23095          the normal stack adjustment.  */
23096       if (frame_size + crtl->outgoing_args_size == 0)
23097         {
23098           int reg = -1;
23099
23100           /* Register r3 is caller-saved.  Normally it does not need to be
23101              saved on entry by the prologue.  However if we choose to save
23102              it for padding then we may confuse the compiler into thinking
23103              a prologue sequence is required when in fact it is not.  This
23104              will occur when shrink-wrapping if r3 is used as a scratch
23105              register and there are no other callee-saved writes.
23106
23107              This situation can be avoided when other callee-saved registers
23108              are available and r3 is not mandatory if we choose a callee-saved
23109              register for padding.  */
23110           bool prefer_callee_reg_p = false;
23111
23112           /* If it is safe to use r3, then do so.  This sometimes
23113              generates better code on Thumb-2 by avoiding the need to
23114              use 32-bit push/pop instructions.  */
23115           if (! any_sibcall_could_use_r3 ()
23116               && arm_size_return_regs () <= 12
23117               && (offsets->saved_regs_mask & (1 << 3)) == 0
23118               && (TARGET_THUMB2
23119                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23120             {
23121               reg = 3;
23122               if (!TARGET_THUMB2)
23123                 prefer_callee_reg_p = true;
23124             }
23125           if (reg == -1
23126               || prefer_callee_reg_p)
23127             {
23128               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23129                 {
23130                   /* Avoid fixed registers; they may be changed at
23131                      arbitrary times so it's unsafe to restore them
23132                      during the epilogue.  */
23133                   if (!fixed_regs[i]
23134                       && (offsets->saved_regs_mask & (1 << i)) == 0)
23135                     {
23136                       reg = i;
23137                       break;
23138                     }
23139                 }
23140             }
23141
23142           if (reg != -1)
23143             {
23144               offsets->saved_regs += 4;
23145               offsets->saved_regs_mask |= (1 << reg);
23146             }
23147         }
23148     }
23149
23150   offsets->locals_base = offsets->soft_frame + frame_size;
23151   offsets->outgoing_args = (offsets->locals_base
23152                             + crtl->outgoing_args_size);
23153
23154   if (ARM_DOUBLEWORD_ALIGN)
23155     {
23156       /* Ensure SP remains doubleword aligned.  */
23157       if (offsets->outgoing_args & 7)
23158         offsets->outgoing_args += 4;
23159       gcc_assert (!(offsets->outgoing_args & 7));
23160     }
23161 }
23162
23163
23164 /* Calculate the relative offsets for the different stack pointers.  Positive
23165    offsets are in the direction of stack growth.  */
23166
23167 HOST_WIDE_INT
23168 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23169 {
23170   arm_stack_offsets *offsets;
23171
23172   offsets = arm_get_frame_offsets ();
23173
23174   /* OK, now we have enough information to compute the distances.
23175      There must be an entry in these switch tables for each pair
23176      of registers in ELIMINABLE_REGS, even if some of the entries
23177      seem to be redundant or useless.  */
23178   switch (from)
23179     {
23180     case ARG_POINTER_REGNUM:
23181       switch (to)
23182         {
23183         case THUMB_HARD_FRAME_POINTER_REGNUM:
23184           return 0;
23185
23186         case FRAME_POINTER_REGNUM:
23187           /* This is the reverse of the soft frame pointer
23188              to hard frame pointer elimination below.  */
23189           return offsets->soft_frame - offsets->saved_args;
23190
23191         case ARM_HARD_FRAME_POINTER_REGNUM:
23192           /* This is only non-zero in the case where the static chain register
23193              is stored above the frame.  */
23194           return offsets->frame - offsets->saved_args - 4;
23195
23196         case STACK_POINTER_REGNUM:
23197           /* If nothing has been pushed on the stack at all
23198              then this will return -4.  This *is* correct!  */
23199           return offsets->outgoing_args - (offsets->saved_args + 4);
23200
23201         default:
23202           gcc_unreachable ();
23203         }
23204       gcc_unreachable ();
23205
23206     case FRAME_POINTER_REGNUM:
23207       switch (to)
23208         {
23209         case THUMB_HARD_FRAME_POINTER_REGNUM:
23210           return 0;
23211
23212         case ARM_HARD_FRAME_POINTER_REGNUM:
23213           /* The hard frame pointer points to the top entry in the
23214              stack frame.  The soft frame pointer to the bottom entry
23215              in the stack frame.  If there is no stack frame at all,
23216              then they are identical.  */
23217
23218           return offsets->frame - offsets->soft_frame;
23219
23220         case STACK_POINTER_REGNUM:
23221           return offsets->outgoing_args - offsets->soft_frame;
23222
23223         default:
23224           gcc_unreachable ();
23225         }
23226       gcc_unreachable ();
23227
23228     default:
23229       /* You cannot eliminate from the stack pointer.
23230          In theory you could eliminate from the hard frame
23231          pointer to the stack pointer, but this will never
23232          happen, since if a stack frame is not needed the
23233          hard frame pointer will never be used.  */
23234       gcc_unreachable ();
23235     }
23236 }
23237
23238 /* Given FROM and TO register numbers, say whether this elimination is
23239    allowed.  Frame pointer elimination is automatically handled.
23240
23241    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
23242    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
23243    pointer, we must eliminate FRAME_POINTER_REGNUM into
23244    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23245    ARG_POINTER_REGNUM.  */
23246
23247 bool
23248 arm_can_eliminate (const int from, const int to)
23249 {
23250   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23251           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23252           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23253           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23254            true);
23255 }
23256
23257 /* Emit RTL to save coprocessor registers on function entry.  Returns the
23258    number of bytes pushed.  */
23259
23260 static int
23261 arm_save_coproc_regs(void)
23262 {
23263   int saved_size = 0;
23264   unsigned reg;
23265   unsigned start_reg;
23266   rtx insn;
23267
23268   if (TARGET_REALLY_IWMMXT)
23269   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23270     if (reg_needs_saving_p (reg))
23271       {
23272         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23273         insn = gen_rtx_MEM (V2SImode, insn);
23274         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23275         RTX_FRAME_RELATED_P (insn) = 1;
23276         saved_size += 8;
23277       }
23278
23279   if (TARGET_VFP_BASE)
23280     {
23281       start_reg = FIRST_VFP_REGNUM;
23282
23283       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23284         {
23285           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23286             {
23287               if (start_reg != reg)
23288                 saved_size += vfp_emit_fstmd (start_reg,
23289                                               (reg - start_reg) / 2);
23290               start_reg = reg + 2;
23291             }
23292         }
23293       if (start_reg != reg)
23294         saved_size += vfp_emit_fstmd (start_reg,
23295                                       (reg - start_reg) / 2);
23296     }
23297   return saved_size;
23298 }
23299
23300
23301 /* Set the Thumb frame pointer from the stack pointer.  */
23302
23303 static void
23304 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23305 {
23306   HOST_WIDE_INT amount;
23307   rtx insn, dwarf;
23308
23309   amount = offsets->outgoing_args - offsets->locals_base;
23310   if (amount < 1024)
23311     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23312                                   stack_pointer_rtx, GEN_INT (amount)));
23313   else
23314     {
23315       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23316       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
23317          expects the first two operands to be the same.  */
23318       if (TARGET_THUMB2)
23319         {
23320           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23321                                         stack_pointer_rtx,
23322                                         hard_frame_pointer_rtx));
23323         }
23324       else
23325         {
23326           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23327                                         hard_frame_pointer_rtx,
23328                                         stack_pointer_rtx));
23329         }
23330       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23331                            plus_constant (Pmode, stack_pointer_rtx, amount));
23332       RTX_FRAME_RELATED_P (dwarf) = 1;
23333       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23334     }
23335
23336   RTX_FRAME_RELATED_P (insn) = 1;
23337 }
23338
23339 struct scratch_reg {
23340   rtx reg;
23341   bool saved;
23342 };
23343
23344 /* Return a short-lived scratch register for use as a 2nd scratch register on
23345    function entry after the registers are saved in the prologue.  This register
23346    must be released by means of release_scratch_register_on_entry.  IP is not
23347    considered since it is always used as the 1st scratch register if available.
23348
23349    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23350    mask of live registers.  */
23351
23352 static void
23353 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23354                                unsigned long live_regs)
23355 {
23356   int regno = -1;
23357
23358   sr->saved = false;
23359
23360   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23361     regno = LR_REGNUM;
23362   else
23363     {
23364       unsigned int i;
23365
23366       for (i = 4; i < 11; i++)
23367         if (regno1 != i && (live_regs & (1 << i)) != 0)
23368           {
23369             regno = i;
23370             break;
23371           }
23372
23373       if (regno < 0)
23374         {
23375           /* If IP is used as the 1st scratch register for a nested function,
23376              then either r3 wasn't available or is used to preserve IP.  */
23377           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23378             regno1 = 3;
23379           regno = (regno1 == 3 ? 2 : 3);
23380           sr->saved
23381             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23382                                regno);
23383         }
23384     }
23385
23386   sr->reg = gen_rtx_REG (SImode, regno);
23387   if (sr->saved)
23388     {
23389       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23390       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23391       rtx x = gen_rtx_SET (stack_pointer_rtx,
23392                            plus_constant (Pmode, stack_pointer_rtx, -4));
23393       RTX_FRAME_RELATED_P (insn) = 1;
23394       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23395     }
23396 }
23397
23398 /* Release a scratch register obtained from the preceding function.  */
23399
23400 static void
23401 release_scratch_register_on_entry (struct scratch_reg *sr)
23402 {
23403   if (sr->saved)
23404     {
23405       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23406       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23407       rtx x = gen_rtx_SET (stack_pointer_rtx,
23408                            plus_constant (Pmode, stack_pointer_rtx, 4));
23409       RTX_FRAME_RELATED_P (insn) = 1;
23410       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23411     }
23412 }
23413
23414 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23415
23416 #if PROBE_INTERVAL > 4096
23417 #error Cannot use indexed addressing mode for stack probing
23418 #endif
23419
23420 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23421    inclusive.  These are offsets from the current stack pointer.  REGNO1
23422    is the index number of the 1st scratch register and LIVE_REGS is the
23423    mask of live registers.  */
23424
23425 static void
23426 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23427                             unsigned int regno1, unsigned long live_regs)
23428 {
23429   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23430
23431   /* See if we have a constant small number of probes to generate.  If so,
23432      that's the easy case.  */
23433   if (size <= PROBE_INTERVAL)
23434     {
23435       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23436       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23437       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23438     }
23439
23440   /* The run-time loop is made up of 10 insns in the generic case while the
23441      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23442   else if (size <= 5 * PROBE_INTERVAL)
23443     {
23444       HOST_WIDE_INT i, rem;
23445
23446       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23447       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23448       emit_stack_probe (reg1);
23449
23450       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23451          it exceeds SIZE.  If only two probes are needed, this will not
23452          generate any code.  Then probe at FIRST + SIZE.  */
23453       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23454         {
23455           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23456           emit_stack_probe (reg1);
23457         }
23458
23459       rem = size - (i - PROBE_INTERVAL);
23460       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23461         {
23462           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23463           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23464         }
23465       else
23466         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23467     }
23468
23469   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23470      extra careful with variables wrapping around because we might be at
23471      the very top (or the very bottom) of the address space and we have
23472      to be able to handle this case properly; in particular, we use an
23473      equality test for the loop condition.  */
23474   else
23475     {
23476       HOST_WIDE_INT rounded_size;
23477       struct scratch_reg sr;
23478
23479       get_scratch_register_on_entry (&sr, regno1, live_regs);
23480
23481       emit_move_insn (reg1, GEN_INT (first));
23482
23483
23484       /* Step 1: round SIZE to the previous multiple of the interval.  */
23485
23486       rounded_size = size & -PROBE_INTERVAL;
23487       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23488
23489
23490       /* Step 2: compute initial and final value of the loop counter.  */
23491
23492       /* TEST_ADDR = SP + FIRST.  */
23493       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23494
23495       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23496       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23497
23498
23499       /* Step 3: the loop
23500
23501          do
23502            {
23503              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23504              probe at TEST_ADDR
23505            }
23506          while (TEST_ADDR != LAST_ADDR)
23507
23508          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23509          until it is equal to ROUNDED_SIZE.  */
23510
23511       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23512
23513
23514       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23515          that SIZE is equal to ROUNDED_SIZE.  */
23516
23517       if (size != rounded_size)
23518         {
23519           HOST_WIDE_INT rem = size - rounded_size;
23520
23521           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23522             {
23523               emit_set_insn (sr.reg,
23524                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23525               emit_stack_probe (plus_constant (Pmode, sr.reg,
23526                                                PROBE_INTERVAL - rem));
23527             }
23528           else
23529             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23530         }
23531
23532       release_scratch_register_on_entry (&sr);
23533     }
23534
23535   /* Make sure nothing is scheduled before we are done.  */
23536   emit_insn (gen_blockage ());
23537 }
23538
23539 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23540    absolute addresses.  */
23541
23542 const char *
23543 output_probe_stack_range (rtx reg1, rtx reg2)
23544 {
23545   static int labelno = 0;
23546   char loop_lab[32];
23547   rtx xops[2];
23548
23549   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23550
23551   /* Loop.  */
23552   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23553
23554   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23555   xops[0] = reg1;
23556   xops[1] = GEN_INT (PROBE_INTERVAL);
23557   output_asm_insn ("sub\t%0, %0, %1", xops);
23558
23559   /* Probe at TEST_ADDR.  */
23560   output_asm_insn ("str\tr0, [%0, #0]", xops);
23561
23562   /* Test if TEST_ADDR == LAST_ADDR.  */
23563   xops[1] = reg2;
23564   output_asm_insn ("cmp\t%0, %1", xops);
23565
23566   /* Branch.  */
23567   fputs ("\tbne\t", asm_out_file);
23568   assemble_name_raw (asm_out_file, loop_lab);
23569   fputc ('\n', asm_out_file);
23570
23571   return "";
23572 }
23573
23574 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23575    function.  */
23576 void
23577 arm_expand_prologue (void)
23578 {
23579   rtx amount;
23580   rtx insn;
23581   rtx ip_rtx;
23582   unsigned long live_regs_mask;
23583   unsigned long func_type;
23584   int fp_offset = 0;
23585   int saved_pretend_args = 0;
23586   int saved_regs = 0;
23587   unsigned HOST_WIDE_INT args_to_push;
23588   HOST_WIDE_INT size;
23589   arm_stack_offsets *offsets;
23590   bool clobber_ip;
23591
23592   func_type = arm_current_func_type ();
23593
23594   /* Naked functions don't have prologues.  */
23595   if (IS_NAKED (func_type))
23596     {
23597       if (flag_stack_usage_info)
23598         current_function_static_stack_size = 0;
23599       return;
23600     }
23601
23602   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23603   args_to_push = crtl->args.pretend_args_size;
23604
23605   /* Compute which register we will have to save onto the stack.  */
23606   offsets = arm_get_frame_offsets ();
23607   live_regs_mask = offsets->saved_regs_mask;
23608
23609   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23610
23611   if (IS_STACKALIGN (func_type))
23612     {
23613       rtx r0, r1;
23614
23615       /* Handle a word-aligned stack pointer.  We generate the following:
23616
23617           mov r0, sp
23618           bic r1, r0, #7
23619           mov sp, r1
23620           <save and restore r0 in normal prologue/epilogue>
23621           mov sp, r0
23622           bx lr
23623
23624          The unwinder doesn't need to know about the stack realignment.
23625          Just tell it we saved SP in r0.  */
23626       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23627
23628       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23629       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23630
23631       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23632       RTX_FRAME_RELATED_P (insn) = 1;
23633       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23634
23635       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23636
23637       /* ??? The CFA changes here, which may cause GDB to conclude that it
23638          has entered a different function.  That said, the unwind info is
23639          correct, individually, before and after this instruction because
23640          we've described the save of SP, which will override the default
23641          handling of SP as restoring from the CFA.  */
23642       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23643     }
23644
23645   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23646      now the value must be -1 as stored by arm_init_machine_status ().  */
23647   cfun->machine->static_chain_stack_bytes
23648     = arm_compute_static_chain_stack_bytes ();
23649
23650   /* The static chain register is the same as the IP register.  If it is
23651      clobbered when creating the frame, we need to save and restore it.  */
23652   clobber_ip = (IS_NESTED (func_type)
23653                 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23654                      || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23655                           || flag_stack_clash_protection)
23656                          && !df_regs_ever_live_p (LR_REGNUM)
23657                          && arm_r3_live_at_start_p ()))
23658                     || arm_current_function_pac_enabled_p ()));
23659
23660   /* Find somewhere to store IP whilst the frame is being created.
23661      We try the following places in order:
23662
23663        1. The last argument register r3 if it is available.
23664        2. A slot on the stack above the frame if there are no
23665           arguments to push onto the stack.
23666        3. Register r3 again, after pushing the argument registers
23667           onto the stack, if this is a varargs function.
23668        4. The last slot on the stack created for the arguments to
23669           push, if this isn't a varargs function.
23670
23671      Note - we only need to tell the dwarf2 backend about the SP
23672      adjustment in the second variant; the static chain register
23673      doesn't need to be unwound, as it doesn't contain a value
23674      inherited from the caller.  */
23675   if (clobber_ip)
23676     {
23677       if (!arm_r3_live_at_start_p ())
23678         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23679       else if (args_to_push == 0)
23680         {
23681           rtx addr, dwarf;
23682
23683           saved_regs += 4;
23684
23685           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23686           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23687           fp_offset = 4;
23688
23689           /* Just tell the dwarf backend that we adjusted SP.  */
23690           dwarf = gen_rtx_SET (stack_pointer_rtx,
23691                                plus_constant (Pmode, stack_pointer_rtx,
23692                                               -fp_offset));
23693           RTX_FRAME_RELATED_P (insn) = 1;
23694           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23695           if (arm_current_function_pac_enabled_p ())
23696             cfun->machine->pacspval_needed = 1;
23697         }
23698       else
23699         {
23700           /* Store the args on the stack.  */
23701           if (cfun->machine->uses_anonymous_args)
23702             {
23703               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23704                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23705               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23706               saved_pretend_args = 1;
23707             }
23708           else
23709             {
23710               rtx addr, dwarf;
23711
23712               if (args_to_push == 4)
23713                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23714               else
23715                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23716                                            plus_constant (Pmode,
23717                                                           stack_pointer_rtx,
23718                                                           -args_to_push));
23719
23720               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23721
23722               /* Just tell the dwarf backend that we adjusted SP.  */
23723               dwarf = gen_rtx_SET (stack_pointer_rtx,
23724                                    plus_constant (Pmode, stack_pointer_rtx,
23725                                                   -args_to_push));
23726               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23727             }
23728
23729           RTX_FRAME_RELATED_P (insn) = 1;
23730           fp_offset = args_to_push;
23731           args_to_push = 0;
23732           if (arm_current_function_pac_enabled_p ())
23733             cfun->machine->pacspval_needed = 1;
23734         }
23735     }
23736
23737   if (arm_current_function_pac_enabled_p ())
23738     {
23739       /* If IP was clobbered we only emit a PAC instruction as the BTI
23740          one will be added before the push of the clobbered IP (if
23741          necessary) by the bti pass.  */
23742       if (aarch_bti_enabled () && !clobber_ip)
23743         insn = emit_insn (gen_pacbti_nop ());
23744       else
23745         insn = emit_insn (gen_pac_nop ());
23746
23747       rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23748       RTX_FRAME_RELATED_P (insn) = 1;
23749       add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23750     }
23751
23752   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23753     {
23754       if (IS_INTERRUPT (func_type))
23755         {
23756           /* Interrupt functions must not corrupt any registers.
23757              Creating a frame pointer however, corrupts the IP
23758              register, so we must push it first.  */
23759           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23760
23761           /* Do not set RTX_FRAME_RELATED_P on this insn.
23762              The dwarf stack unwinding code only wants to see one
23763              stack decrement per function, and this is not it.  If
23764              this instruction is labeled as being part of the frame
23765              creation sequence then dwarf2out_frame_debug_expr will
23766              die when it encounters the assignment of IP to FP
23767              later on, since the use of SP here establishes SP as
23768              the CFA register and not IP.
23769
23770              Anyway this instruction is not really part of the stack
23771              frame creation although it is part of the prologue.  */
23772         }
23773
23774       insn = emit_set_insn (ip_rtx,
23775                             plus_constant (Pmode, stack_pointer_rtx,
23776                                            fp_offset));
23777       RTX_FRAME_RELATED_P (insn) = 1;
23778     }
23779
23780   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23781   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23782     {
23783       saved_regs += 4;
23784       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23785                                                 GEN_INT (FPCXTNS_ENUM)));
23786       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23787                           plus_constant (Pmode, stack_pointer_rtx, -4));
23788       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23789       RTX_FRAME_RELATED_P (insn) = 1;
23790     }
23791
23792   if (args_to_push)
23793     {
23794       /* Push the argument registers, or reserve space for them.  */
23795       if (cfun->machine->uses_anonymous_args)
23796         insn = emit_multi_reg_push
23797           ((0xf0 >> (args_to_push / 4)) & 0xf,
23798            (0xf0 >> (args_to_push / 4)) & 0xf);
23799       else
23800         insn = emit_insn
23801           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23802                        GEN_INT (- args_to_push)));
23803       RTX_FRAME_RELATED_P (insn) = 1;
23804     }
23805
23806   /* If this is an interrupt service routine, and the link register
23807      is going to be pushed, and we're not generating extra
23808      push of IP (needed when frame is needed and frame layout if apcs),
23809      subtracting four from LR now will mean that the function return
23810      can be done with a single instruction.  */
23811   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23812       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23813       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23814       && TARGET_ARM)
23815     {
23816       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23817
23818       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23819     }
23820
23821   if (live_regs_mask)
23822     {
23823       unsigned long dwarf_regs_mask = live_regs_mask;
23824
23825       saved_regs += bit_count (live_regs_mask) * 4;
23826       if (optimize_size && !frame_pointer_needed
23827           && saved_regs == offsets->saved_regs - offsets->saved_args)
23828         {
23829           /* If no coprocessor registers are being pushed and we don't have
23830              to worry about a frame pointer then push extra registers to
23831              create the stack frame.  This is done in a way that does not
23832              alter the frame layout, so is independent of the epilogue.  */
23833           int n;
23834           int frame;
23835           n = 0;
23836           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23837             n++;
23838           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23839           if (frame && n * 4 >= frame)
23840             {
23841               n = frame / 4;
23842               live_regs_mask |= (1 << n) - 1;
23843               saved_regs += frame;
23844             }
23845         }
23846
23847       if (TARGET_LDRD
23848           && current_tune->prefer_ldrd_strd
23849           && !optimize_function_for_size_p (cfun))
23850         {
23851           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23852           if (TARGET_THUMB2)
23853             thumb2_emit_strd_push (live_regs_mask);
23854           else if (TARGET_ARM
23855                    && !TARGET_APCS_FRAME
23856                    && !IS_INTERRUPT (func_type))
23857             arm_emit_strd_push (live_regs_mask);
23858           else
23859             {
23860               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23861               RTX_FRAME_RELATED_P (insn) = 1;
23862             }
23863         }
23864       else
23865         {
23866           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23867           RTX_FRAME_RELATED_P (insn) = 1;
23868         }
23869     }
23870
23871   if (! IS_VOLATILE (func_type))
23872     saved_regs += arm_save_coproc_regs ();
23873
23874   if (frame_pointer_needed && TARGET_ARM)
23875     {
23876       /* Create the new frame pointer.  */
23877       if (TARGET_APCS_FRAME)
23878         {
23879           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23880           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23881           RTX_FRAME_RELATED_P (insn) = 1;
23882         }
23883       else
23884         {
23885           insn = GEN_INT (saved_regs - (4 + fp_offset));
23886           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23887                                         stack_pointer_rtx, insn));
23888           RTX_FRAME_RELATED_P (insn) = 1;
23889         }
23890     }
23891
23892   size = offsets->outgoing_args - offsets->saved_args;
23893   if (flag_stack_usage_info)
23894     current_function_static_stack_size = size;
23895
23896   /* If this isn't an interrupt service routine and we have a frame, then do
23897      stack checking.  We use IP as the first scratch register, except for the
23898      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23899   if (!IS_INTERRUPT (func_type)
23900       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23901           || flag_stack_clash_protection))
23902     {
23903       unsigned int regno;
23904
23905       if (!IS_NESTED (func_type) || clobber_ip)
23906         regno = IP_REGNUM;
23907       else if (df_regs_ever_live_p (LR_REGNUM))
23908         regno = LR_REGNUM;
23909       else
23910         regno = 3;
23911
23912       if (crtl->is_leaf && !cfun->calls_alloca)
23913         {
23914           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23915             arm_emit_probe_stack_range (get_stack_check_protect (),
23916                                         size - get_stack_check_protect (),
23917                                         regno, live_regs_mask);
23918         }
23919       else if (size > 0)
23920         arm_emit_probe_stack_range (get_stack_check_protect (), size,
23921                                     regno, live_regs_mask);
23922     }
23923
23924   /* Recover the static chain register.  */
23925   if (clobber_ip)
23926     {
23927       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23928         insn = gen_rtx_REG (SImode, 3);
23929       else
23930         {
23931           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23932           insn = gen_frame_mem (SImode, insn);
23933         }
23934       emit_set_insn (ip_rtx, insn);
23935       emit_insn (gen_force_register_use (ip_rtx));
23936     }
23937
23938   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23939     {
23940       /* This add can produce multiple insns for a large constant, so we
23941          need to get tricky.  */
23942       rtx_insn *last = get_last_insn ();
23943
23944       amount = GEN_INT (offsets->saved_args + saved_regs
23945                         - offsets->outgoing_args);
23946
23947       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23948                                     amount));
23949       do
23950         {
23951           last = last ? NEXT_INSN (last) : get_insns ();
23952           RTX_FRAME_RELATED_P (last) = 1;
23953         }
23954       while (last != insn);
23955
23956       /* If the frame pointer is needed, emit a special barrier that
23957          will prevent the scheduler from moving stores to the frame
23958          before the stack adjustment.  */
23959       if (frame_pointer_needed)
23960         emit_insn (gen_stack_tie (stack_pointer_rtx,
23961                                   hard_frame_pointer_rtx));
23962     }
23963
23964
23965   if (frame_pointer_needed && TARGET_THUMB2)
23966     thumb_set_frame_pointer (offsets);
23967
23968   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23969     {
23970       unsigned long mask;
23971
23972       mask = live_regs_mask;
23973       mask &= THUMB2_WORK_REGS;
23974       if (!IS_NESTED (func_type))
23975         mask |= (1 << IP_REGNUM);
23976       arm_load_pic_register (mask, NULL_RTX);
23977     }
23978
23979   /* If we are profiling, make sure no instructions are scheduled before
23980      the call to mcount.  Similarly if the user has requested no
23981      scheduling in the prolog.  Similarly if we want non-call exceptions
23982      using the EABI unwinder, to prevent faulting instructions from being
23983      swapped with a stack adjustment.  */
23984   if (crtl->profile || !TARGET_SCHED_PROLOG
23985       || (arm_except_unwind_info (&global_options) == UI_TARGET
23986           && cfun->can_throw_non_call_exceptions))
23987     emit_insn (gen_blockage ());
23988
23989   /* If the link register is being kept alive, with the return address in it,
23990      then make sure that it does not get reused by the ce2 pass.  */
23991   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23992     cfun->machine->lr_save_eliminated = 1;
23993 }
23994 \f
23995 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23996 static void
23997 arm_print_condition (FILE *stream)
23998 {
23999   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
24000     {
24001       /* Branch conversion is not implemented for Thumb-2.  */
24002       if (TARGET_THUMB)
24003         {
24004           output_operand_lossage ("predicated Thumb instruction");
24005           return;
24006         }
24007       if (current_insn_predicate != NULL)
24008         {
24009           output_operand_lossage
24010             ("predicated instruction in conditional sequence");
24011           return;
24012         }
24013
24014       fputs (arm_condition_codes[arm_current_cc], stream);
24015     }
24016   else if (current_insn_predicate)
24017     {
24018       enum arm_cond_code code;
24019
24020       if (TARGET_THUMB1)
24021         {
24022           output_operand_lossage ("predicated Thumb instruction");
24023           return;
24024         }
24025
24026       code = get_arm_condition_code (current_insn_predicate);
24027       fputs (arm_condition_codes[code], stream);
24028     }
24029 }
24030
24031
24032 /* Globally reserved letters: acln
24033    Puncutation letters currently used: @_|?().!#
24034    Lower case letters currently used: bcdefhimpqtvwxyz
24035    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24036    Letters previously used, but now deprecated/obsolete: sWXYZ.
24037
24038    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24039
24040    If CODE is 'd', then the X is a condition operand and the instruction
24041    should only be executed if the condition is true.
24042    if CODE is 'D', then the X is a condition operand and the instruction
24043    should only be executed if the condition is false: however, if the mode
24044    of the comparison is CCFPEmode, then always execute the instruction -- we
24045    do this because in these circumstances !GE does not necessarily imply LT;
24046    in these cases the instruction pattern will take care to make sure that
24047    an instruction containing %d will follow, thereby undoing the effects of
24048    doing this instruction unconditionally.
24049    If CODE is 'N' then X is a floating point operand that must be negated
24050    before output.
24051    If CODE is 'B' then output a bitwise inverted value of X (a const int).
24052    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24053    If CODE is 'V', then the operand must be a CONST_INT representing
24054    the bits to preserve in the modified register (Rd) of a BFI or BFC
24055    instruction: print out both the width and lsb (shift) fields.  */
24056 static void
24057 arm_print_operand (FILE *stream, rtx x, int code)
24058 {
24059   switch (code)
24060     {
24061     case '@':
24062       fputs (ASM_COMMENT_START, stream);
24063       return;
24064
24065     case '_':
24066       fputs (user_label_prefix, stream);
24067       return;
24068
24069     case '|':
24070       fputs (REGISTER_PREFIX, stream);
24071       return;
24072
24073     case '?':
24074       arm_print_condition (stream);
24075       return;
24076
24077     case '.':
24078       /* The current condition code for a condition code setting instruction.
24079          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
24080       fputc('s', stream);
24081       arm_print_condition (stream);
24082       return;
24083
24084     case '!':
24085       /* If the instruction is conditionally executed then print
24086          the current condition code, otherwise print 's'.  */
24087       gcc_assert (TARGET_THUMB2);
24088       if (current_insn_predicate)
24089         arm_print_condition (stream);
24090       else
24091         fputc('s', stream);
24092       break;
24093
24094     /* %# is a "break" sequence. It doesn't output anything, but is used to
24095        separate e.g. operand numbers from following text, if that text consists
24096        of further digits which we don't want to be part of the operand
24097        number.  */
24098     case '#':
24099       return;
24100
24101     case 'N':
24102       {
24103         REAL_VALUE_TYPE r;
24104         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24105         fprintf (stream, "%s", fp_const_from_val (&r));
24106       }
24107       return;
24108
24109     /* An integer or symbol address without a preceding # sign.  */
24110     case 'c':
24111       switch (GET_CODE (x))
24112         {
24113         case CONST_INT:
24114           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24115           break;
24116
24117         case SYMBOL_REF:
24118           output_addr_const (stream, x);
24119           break;
24120
24121         case CONST:
24122           if (GET_CODE (XEXP (x, 0)) == PLUS
24123               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24124             {
24125               output_addr_const (stream, x);
24126               break;
24127             }
24128           /* Fall through.  */
24129
24130         default:
24131           output_operand_lossage ("Unsupported operand for code '%c'", code);
24132         }
24133       return;
24134
24135     /* An integer that we want to print in HEX.  */
24136     case 'x':
24137       switch (GET_CODE (x))
24138         {
24139         case CONST_INT:
24140           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24141           break;
24142
24143         default:
24144           output_operand_lossage ("Unsupported operand for code '%c'", code);
24145         }
24146       return;
24147
24148     case 'B':
24149       if (CONST_INT_P (x))
24150         {
24151           HOST_WIDE_INT val;
24152           val = ARM_SIGN_EXTEND (~INTVAL (x));
24153           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24154         }
24155       else
24156         {
24157           putc ('~', stream);
24158           output_addr_const (stream, x);
24159         }
24160       return;
24161
24162     case 'b':
24163       /* Print the log2 of a CONST_INT.  */
24164       {
24165         HOST_WIDE_INT val;
24166
24167         if (!CONST_INT_P (x)
24168             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24169           output_operand_lossage ("Unsupported operand for code '%c'", code);
24170         else
24171           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24172       }
24173       return;
24174
24175     case 'L':
24176       /* The low 16 bits of an immediate constant.  */
24177       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24178       return;
24179
24180     case 'i':
24181       fprintf (stream, "%s", arithmetic_instr (x, 1));
24182       return;
24183
24184     case 'I':
24185       fprintf (stream, "%s", arithmetic_instr (x, 0));
24186       return;
24187
24188     case 'S':
24189       {
24190         HOST_WIDE_INT val;
24191         const char *shift;
24192
24193         shift = shift_op (x, &val);
24194
24195         if (shift)
24196           {
24197             fprintf (stream, ", %s ", shift);
24198             if (val == -1)
24199               arm_print_operand (stream, XEXP (x, 1), 0);
24200             else
24201               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24202           }
24203       }
24204       return;
24205
24206       /* An explanation of the 'Q', 'R' and 'H' register operands:
24207
24208          In a pair of registers containing a DI or DF value the 'Q'
24209          operand returns the register number of the register containing
24210          the least significant part of the value.  The 'R' operand returns
24211          the register number of the register containing the most
24212          significant part of the value.
24213
24214          The 'H' operand returns the higher of the two register numbers.
24215          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24216          same as the 'Q' operand, since the most significant part of the
24217          value is held in the lower number register.  The reverse is true
24218          on systems where WORDS_BIG_ENDIAN is false.
24219
24220          The purpose of these operands is to distinguish between cases
24221          where the endian-ness of the values is important (for example
24222          when they are added together), and cases where the endian-ness
24223          is irrelevant, but the order of register operations is important.
24224          For example when loading a value from memory into a register
24225          pair, the endian-ness does not matter.  Provided that the value
24226          from the lower memory address is put into the lower numbered
24227          register, and the value from the higher address is put into the
24228          higher numbered register, the load will work regardless of whether
24229          the value being loaded is big-wordian or little-wordian.  The
24230          order of the two register loads can matter however, if the address
24231          of the memory location is actually held in one of the registers
24232          being overwritten by the load.
24233
24234          The 'Q' and 'R' constraints are also available for 64-bit
24235          constants.  */
24236     case 'Q':
24237       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24238         {
24239           rtx part = gen_lowpart (SImode, x);
24240           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24241           return;
24242         }
24243
24244       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24245         {
24246           output_operand_lossage ("invalid operand for code '%c'", code);
24247           return;
24248         }
24249
24250       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24251       return;
24252
24253     case 'R':
24254       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24255         {
24256           machine_mode mode = GET_MODE (x);
24257           rtx part;
24258
24259           if (mode == VOIDmode)
24260             mode = DImode;
24261           part = gen_highpart_mode (SImode, mode, x);
24262           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24263           return;
24264         }
24265
24266       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24267         {
24268           output_operand_lossage ("invalid operand for code '%c'", code);
24269           return;
24270         }
24271
24272       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24273       return;
24274
24275     case 'H':
24276       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24277         {
24278           output_operand_lossage ("invalid operand for code '%c'", code);
24279           return;
24280         }
24281
24282       asm_fprintf (stream, "%r", REGNO (x) + 1);
24283       return;
24284
24285     case 'J':
24286       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24287         {
24288           output_operand_lossage ("invalid operand for code '%c'", code);
24289           return;
24290         }
24291
24292       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24293       return;
24294
24295     case 'K':
24296       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24297         {
24298           output_operand_lossage ("invalid operand for code '%c'", code);
24299           return;
24300         }
24301
24302       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24303       return;
24304
24305     case 'm':
24306       asm_fprintf (stream, "%r",
24307                    REG_P (XEXP (x, 0))
24308                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24309       return;
24310
24311     case 'M':
24312       asm_fprintf (stream, "{%r-%r}",
24313                    REGNO (x),
24314                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24315       return;
24316
24317     /* Like 'M', but writing doubleword vector registers, for use by Neon
24318        insns.  */
24319     case 'h':
24320       {
24321         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24322         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24323         if (numregs == 1)
24324           asm_fprintf (stream, "{d%d}", regno);
24325         else
24326           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24327       }
24328       return;
24329
24330     case 'd':
24331       /* CONST_TRUE_RTX means always -- that's the default.  */
24332       if (x == const_true_rtx)
24333         return;
24334
24335       if (!COMPARISON_P (x))
24336         {
24337           output_operand_lossage ("invalid operand for code '%c'", code);
24338           return;
24339         }
24340
24341       fputs (arm_condition_codes[get_arm_condition_code (x)],
24342              stream);
24343       return;
24344
24345     case 'D':
24346       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
24347          want to do that.  */
24348       if (x == const_true_rtx)
24349         {
24350           output_operand_lossage ("instruction never executed");
24351           return;
24352         }
24353       if (!COMPARISON_P (x))
24354         {
24355           output_operand_lossage ("invalid operand for code '%c'", code);
24356           return;
24357         }
24358
24359       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24360                                  (get_arm_condition_code (x))],
24361              stream);
24362       return;
24363
24364     case 'V':
24365       {
24366         /* Output the LSB (shift) and width for a bitmask instruction
24367            based on a literal mask.  The LSB is printed first,
24368            followed by the width.
24369
24370            Eg. For 0b1...1110001, the result is #1, #3.  */
24371         if (!CONST_INT_P (x))
24372           {
24373             output_operand_lossage ("invalid operand for code '%c'", code);
24374             return;
24375           }
24376
24377         unsigned HOST_WIDE_INT val
24378           = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24379         int lsb = exact_log2 (val & -val);
24380         asm_fprintf (stream, "#%d, #%d", lsb,
24381                      (exact_log2 (val + (val & -val)) - lsb));
24382       }
24383       return;
24384
24385     case 's':
24386     case 'W':
24387     case 'X':
24388     case 'Y':
24389     case 'Z':
24390       /* Former Maverick support, removed after GCC-4.7.  */
24391       output_operand_lossage ("obsolete Maverick format code '%c'", code);
24392       return;
24393
24394     case 'U':
24395       if (!REG_P (x)
24396           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24397           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24398         /* Bad value for wCG register number.  */
24399         {
24400           output_operand_lossage ("invalid operand for code '%c'", code);
24401           return;
24402         }
24403
24404       else
24405         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24406       return;
24407
24408       /* Print an iWMMXt control register name.  */
24409     case 'w':
24410       if (!CONST_INT_P (x)
24411           || INTVAL (x) < 0
24412           || INTVAL (x) >= 16)
24413         /* Bad value for wC register number.  */
24414         {
24415           output_operand_lossage ("invalid operand for code '%c'", code);
24416           return;
24417         }
24418
24419       else
24420         {
24421           static const char * wc_reg_names [16] =
24422             {
24423               "wCID",  "wCon",  "wCSSF", "wCASF",
24424               "wC4",   "wC5",   "wC6",   "wC7",
24425               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24426               "wC12",  "wC13",  "wC14",  "wC15"
24427             };
24428
24429           fputs (wc_reg_names [INTVAL (x)], stream);
24430         }
24431       return;
24432
24433     /* Print the high single-precision register of a VFP double-precision
24434        register.  */
24435     case 'p':
24436       {
24437         machine_mode mode = GET_MODE (x);
24438         int regno;
24439
24440         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24441           {
24442             output_operand_lossage ("invalid operand for code '%c'", code);
24443             return;
24444           }
24445
24446         regno = REGNO (x);
24447         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24448           {
24449             output_operand_lossage ("invalid operand for code '%c'", code);
24450             return;
24451           }
24452
24453         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24454       }
24455       return;
24456
24457     /* Print a VFP/Neon double precision or quad precision register name.  */
24458     case 'P':
24459     case 'q':
24460       {
24461         machine_mode mode = GET_MODE (x);
24462         int is_quad = (code == 'q');
24463         int regno;
24464
24465         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24466           {
24467             output_operand_lossage ("invalid operand for code '%c'", code);
24468             return;
24469           }
24470
24471         if (!REG_P (x)
24472             || !IS_VFP_REGNUM (REGNO (x)))
24473           {
24474             output_operand_lossage ("invalid operand for code '%c'", code);
24475             return;
24476           }
24477
24478         regno = REGNO (x);
24479         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24480             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24481           {
24482             output_operand_lossage ("invalid operand for code '%c'", code);
24483             return;
24484           }
24485
24486         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24487           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24488       }
24489       return;
24490
24491     /* These two codes print the low/high doubleword register of a Neon quad
24492        register, respectively.  For pair-structure types, can also print
24493        low/high quadword registers.  */
24494     case 'e':
24495     case 'f':
24496       {
24497         machine_mode mode = GET_MODE (x);
24498         int regno;
24499
24500         if ((GET_MODE_SIZE (mode) != 16
24501              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24502           {
24503             output_operand_lossage ("invalid operand for code '%c'", code);
24504             return;
24505           }
24506
24507         regno = REGNO (x);
24508         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24509           {
24510             output_operand_lossage ("invalid operand for code '%c'", code);
24511             return;
24512           }
24513
24514         if (GET_MODE_SIZE (mode) == 16)
24515           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24516                                   + (code == 'f' ? 1 : 0));
24517         else
24518           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24519                                   + (code == 'f' ? 1 : 0));
24520       }
24521       return;
24522
24523     /* Print a VFPv3 floating-point constant, represented as an integer
24524        index.  */
24525     case 'G':
24526       {
24527         int index = vfp3_const_double_index (x);
24528         gcc_assert (index != -1);
24529         fprintf (stream, "%d", index);
24530       }
24531       return;
24532
24533     /* Print bits representing opcode features for Neon.
24534
24535        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24536        and polynomials as unsigned.
24537
24538        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24539
24540        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24541
24542     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24543     case 'T':
24544       {
24545         HOST_WIDE_INT bits = INTVAL (x);
24546         fputc ("uspf"[bits & 3], stream);
24547       }
24548       return;
24549
24550     /* Likewise, but signed and unsigned integers are both 'i'.  */
24551     case 'F':
24552       {
24553         HOST_WIDE_INT bits = INTVAL (x);
24554         fputc ("iipf"[bits & 3], stream);
24555       }
24556       return;
24557
24558     /* As for 'T', but emit 'u' instead of 'p'.  */
24559     case 't':
24560       {
24561         HOST_WIDE_INT bits = INTVAL (x);
24562         fputc ("usuf"[bits & 3], stream);
24563       }
24564       return;
24565
24566     /* Bit 2: rounding (vs none).  */
24567     case 'O':
24568       {
24569         HOST_WIDE_INT bits = INTVAL (x);
24570         fputs ((bits & 4) != 0 ? "r" : "", stream);
24571       }
24572       return;
24573
24574     /* Memory operand for vld1/vst1 instruction.  */
24575     case 'A':
24576       {
24577         rtx addr;
24578         bool postinc = FALSE;
24579         rtx postinc_reg = NULL;
24580         unsigned align, memsize, align_bits;
24581
24582         gcc_assert (MEM_P (x));
24583         addr = XEXP (x, 0);
24584         if (GET_CODE (addr) == POST_INC)
24585           {
24586             postinc = 1;
24587             addr = XEXP (addr, 0);
24588           }
24589         if (GET_CODE (addr) == POST_MODIFY)
24590           {
24591             postinc_reg = XEXP( XEXP (addr, 1), 1);
24592             addr = XEXP (addr, 0);
24593           }
24594         asm_fprintf (stream, "[%r", REGNO (addr));
24595
24596         /* We know the alignment of this access, so we can emit a hint in the
24597            instruction (for some alignments) as an aid to the memory subsystem
24598            of the target.  */
24599         align = MEM_ALIGN (x) >> 3;
24600         memsize = MEM_SIZE (x);
24601
24602         /* Only certain alignment specifiers are supported by the hardware.  */
24603         if (memsize == 32 && (align % 32) == 0)
24604           align_bits = 256;
24605         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24606           align_bits = 128;
24607         else if (memsize >= 8 && (align % 8) == 0)
24608           align_bits = 64;
24609         else
24610           align_bits = 0;
24611
24612         if (align_bits != 0)
24613           asm_fprintf (stream, ":%d", align_bits);
24614
24615         asm_fprintf (stream, "]");
24616
24617         if (postinc)
24618           fputs("!", stream);
24619         if (postinc_reg)
24620           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24621       }
24622       return;
24623
24624     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24625        rtx_code the memory operands output looks like following.
24626        1. [Rn], #+/-<imm>
24627        2. [Rn, #+/-<imm>]!
24628        3. [Rn, #+/-<imm>]
24629        4. [Rn].  */
24630     case 'E':
24631       {
24632         rtx addr;
24633         rtx postinc_reg = NULL;
24634         unsigned inc_val = 0;
24635         enum rtx_code code;
24636
24637         gcc_assert (MEM_P (x));
24638         addr = XEXP (x, 0);
24639         code = GET_CODE (addr);
24640         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24641             || code  == PRE_DEC)
24642           {
24643             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24644             inc_val = GET_MODE_SIZE (GET_MODE (x));
24645             if (code == POST_INC || code == POST_DEC)
24646               asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24647                                               ? "": "-", inc_val);
24648             else
24649               asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24650                                                ? "": "-", inc_val);
24651           }
24652         else if (code == POST_MODIFY || code == PRE_MODIFY)
24653           {
24654             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24655             postinc_reg = XEXP (XEXP (addr, 1), 1);
24656             if (postinc_reg && CONST_INT_P (postinc_reg))
24657               {
24658                 if (code == POST_MODIFY)
24659                   asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24660                 else
24661                   asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24662               }
24663           }
24664         else if (code == PLUS)
24665           {
24666             rtx base = XEXP (addr, 0);
24667             rtx index = XEXP (addr, 1);
24668
24669             gcc_assert (REG_P (base) && CONST_INT_P (index));
24670
24671             HOST_WIDE_INT offset = INTVAL (index);
24672             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24673           }
24674         else
24675           {
24676             gcc_assert (REG_P (addr));
24677             asm_fprintf (stream, "[%r]",REGNO (addr));
24678           }
24679       }
24680       return;
24681
24682     case 'C':
24683       {
24684         rtx addr;
24685
24686         gcc_assert (MEM_P (x));
24687         addr = XEXP (x, 0);
24688         gcc_assert (REG_P (addr));
24689         asm_fprintf (stream, "[%r]", REGNO (addr));
24690       }
24691       return;
24692
24693     /* Translate an S register number into a D register number and element index.  */
24694     case 'y':
24695       {
24696         machine_mode mode = GET_MODE (x);
24697         int regno;
24698
24699         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24700           {
24701             output_operand_lossage ("invalid operand for code '%c'", code);
24702             return;
24703           }
24704
24705         regno = REGNO (x);
24706         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24707           {
24708             output_operand_lossage ("invalid operand for code '%c'", code);
24709             return;
24710           }
24711
24712         regno = regno - FIRST_VFP_REGNUM;
24713         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24714       }
24715       return;
24716
24717     case 'v':
24718         gcc_assert (CONST_DOUBLE_P (x));
24719         int result;
24720         result = vfp3_const_double_for_fract_bits (x);
24721         if (result == 0)
24722           result = vfp3_const_double_for_bits (x);
24723         fprintf (stream, "#%d", result);
24724         return;
24725
24726     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24727        number into a D register number and element index.  */
24728     case 'z':
24729       {
24730         machine_mode mode = GET_MODE (x);
24731         int regno;
24732
24733         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24734           {
24735             output_operand_lossage ("invalid operand for code '%c'", code);
24736             return;
24737           }
24738
24739         regno = REGNO (x);
24740         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24741           {
24742             output_operand_lossage ("invalid operand for code '%c'", code);
24743             return;
24744           }
24745
24746         regno = regno - FIRST_VFP_REGNUM;
24747         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24748       }
24749       return;
24750
24751     default:
24752       if (x == 0)
24753         {
24754           output_operand_lossage ("missing operand");
24755           return;
24756         }
24757
24758       switch (GET_CODE (x))
24759         {
24760         case REG:
24761           asm_fprintf (stream, "%r", REGNO (x));
24762           break;
24763
24764         case MEM:
24765           output_address (GET_MODE (x), XEXP (x, 0));
24766           break;
24767
24768         case CONST_DOUBLE:
24769           {
24770             char fpstr[20];
24771             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24772                               sizeof (fpstr), 0, 1);
24773             fprintf (stream, "#%s", fpstr);
24774           }
24775           break;
24776
24777         default:
24778           gcc_assert (GET_CODE (x) != NEG);
24779           fputc ('#', stream);
24780           if (GET_CODE (x) == HIGH)
24781             {
24782               fputs (":lower16:", stream);
24783               x = XEXP (x, 0);
24784             }
24785
24786           output_addr_const (stream, x);
24787           break;
24788         }
24789     }
24790 }
24791 \f
24792 /* Target hook for printing a memory address.  */
24793 static void
24794 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24795 {
24796   if (TARGET_32BIT)
24797     {
24798       int is_minus = GET_CODE (x) == MINUS;
24799
24800       if (REG_P (x))
24801         asm_fprintf (stream, "[%r]", REGNO (x));
24802       else if (GET_CODE (x) == PLUS || is_minus)
24803         {
24804           rtx base = XEXP (x, 0);
24805           rtx index = XEXP (x, 1);
24806           HOST_WIDE_INT offset = 0;
24807           if (!REG_P (base)
24808               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24809             {
24810               /* Ensure that BASE is a register.  */
24811               /* (one of them must be).  */
24812               /* Also ensure the SP is not used as in index register.  */
24813               std::swap (base, index);
24814             }
24815           switch (GET_CODE (index))
24816             {
24817             case CONST_INT:
24818               offset = INTVAL (index);
24819               if (is_minus)
24820                 offset = -offset;
24821               asm_fprintf (stream, "[%r, #%wd]",
24822                            REGNO (base), offset);
24823               break;
24824
24825             case REG:
24826               asm_fprintf (stream, "[%r, %s%r]",
24827                            REGNO (base), is_minus ? "-" : "",
24828                            REGNO (index));
24829               break;
24830
24831             case MULT:
24832             case ASHIFTRT:
24833             case LSHIFTRT:
24834             case ASHIFT:
24835             case ROTATERT:
24836               {
24837                 asm_fprintf (stream, "[%r, %s%r",
24838                              REGNO (base), is_minus ? "-" : "",
24839                              REGNO (XEXP (index, 0)));
24840                 arm_print_operand (stream, index, 'S');
24841                 fputs ("]", stream);
24842                 break;
24843               }
24844
24845             default:
24846               gcc_unreachable ();
24847             }
24848         }
24849       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24850                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24851         {
24852           gcc_assert (REG_P (XEXP (x, 0)));
24853
24854           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24855             asm_fprintf (stream, "[%r, #%s%d]!",
24856                          REGNO (XEXP (x, 0)),
24857                          GET_CODE (x) == PRE_DEC ? "-" : "",
24858                          GET_MODE_SIZE (mode));
24859           else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24860             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24861           else
24862             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24863                          GET_CODE (x) == POST_DEC ? "-" : "",
24864                          GET_MODE_SIZE (mode));
24865         }
24866       else if (GET_CODE (x) == PRE_MODIFY)
24867         {
24868           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24869           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24870             asm_fprintf (stream, "#%wd]!",
24871                          INTVAL (XEXP (XEXP (x, 1), 1)));
24872           else
24873             asm_fprintf (stream, "%r]!",
24874                          REGNO (XEXP (XEXP (x, 1), 1)));
24875         }
24876       else if (GET_CODE (x) == POST_MODIFY)
24877         {
24878           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24879           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24880             asm_fprintf (stream, "#%wd",
24881                          INTVAL (XEXP (XEXP (x, 1), 1)));
24882           else
24883             asm_fprintf (stream, "%r",
24884                          REGNO (XEXP (XEXP (x, 1), 1)));
24885         }
24886       else output_addr_const (stream, x);
24887     }
24888   else
24889     {
24890       if (REG_P (x))
24891         asm_fprintf (stream, "[%r]", REGNO (x));
24892       else if (GET_CODE (x) == POST_INC)
24893         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24894       else if (GET_CODE (x) == PLUS)
24895         {
24896           gcc_assert (REG_P (XEXP (x, 0)));
24897           if (CONST_INT_P (XEXP (x, 1)))
24898             asm_fprintf (stream, "[%r, #%wd]",
24899                          REGNO (XEXP (x, 0)),
24900                          INTVAL (XEXP (x, 1)));
24901           else
24902             asm_fprintf (stream, "[%r, %r]",
24903                          REGNO (XEXP (x, 0)),
24904                          REGNO (XEXP (x, 1)));
24905         }
24906       else
24907         output_addr_const (stream, x);
24908     }
24909 }
24910 \f
24911 /* Target hook for indicating whether a punctuation character for
24912    TARGET_PRINT_OPERAND is valid.  */
24913 static bool
24914 arm_print_operand_punct_valid_p (unsigned char code)
24915 {
24916   return (code == '@' || code == '|' || code == '.'
24917           || code == '(' || code == ')' || code == '#'
24918           || (TARGET_32BIT && (code == '?'))
24919           || (TARGET_THUMB2 && (code == '!'))
24920           || (TARGET_THUMB && (code == '_')));
24921 }
24922 \f
24923 /* Target hook for assembling integer objects.  The ARM version needs to
24924    handle word-sized values specially.  */
24925 static bool
24926 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24927 {
24928   machine_mode mode;
24929
24930   if (size == UNITS_PER_WORD && aligned_p)
24931     {
24932       fputs ("\t.word\t", asm_out_file);
24933       output_addr_const (asm_out_file, x);
24934
24935       /* Mark symbols as position independent.  We only do this in the
24936          .text segment, not in the .data segment.  */
24937       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24938           (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24939         {
24940           /* See legitimize_pic_address for an explanation of the
24941              TARGET_VXWORKS_RTP check.  */
24942           /* References to weak symbols cannot be resolved locally:
24943              they may be overridden by a non-weak definition at link
24944              time.  */
24945           if (!arm_pic_data_is_text_relative
24946               || (SYMBOL_REF_P (x)
24947                   && (!SYMBOL_REF_LOCAL_P (x)
24948                       || (SYMBOL_REF_DECL (x)
24949                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24950                       || (SYMBOL_REF_FUNCTION_P (x)
24951                           && !arm_fdpic_local_funcdesc_p (x)))))
24952             {
24953               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24954                 fputs ("(GOTFUNCDESC)", asm_out_file);
24955               else
24956                 fputs ("(GOT)", asm_out_file);
24957             }
24958           else
24959             {
24960               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24961                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24962               else
24963                 {
24964                   bool is_readonly;
24965
24966                   if (!TARGET_FDPIC
24967                       || arm_is_segment_info_known (x, &is_readonly))
24968                     fputs ("(GOTOFF)", asm_out_file);
24969                   else
24970                     fputs ("(GOT)", asm_out_file);
24971                 }
24972             }
24973         }
24974
24975       /* For FDPIC we also have to mark symbol for .data section.  */
24976       if (TARGET_FDPIC
24977           && !making_const_table
24978           && SYMBOL_REF_P (x)
24979           && SYMBOL_REF_FUNCTION_P (x))
24980         fputs ("(FUNCDESC)", asm_out_file);
24981
24982       fputc ('\n', asm_out_file);
24983       return true;
24984     }
24985
24986   mode = GET_MODE (x);
24987
24988   if (arm_vector_mode_supported_p (mode))
24989     {
24990       int i, units;
24991
24992       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24993
24994       units = CONST_VECTOR_NUNITS (x);
24995       size = GET_MODE_UNIT_SIZE (mode);
24996
24997       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24998         for (i = 0; i < units; i++)
24999           {
25000             rtx elt = CONST_VECTOR_ELT (x, i);
25001             assemble_integer
25002               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
25003           }
25004       else
25005         for (i = 0; i < units; i++)
25006           {
25007             rtx elt = CONST_VECTOR_ELT (x, i);
25008             assemble_real
25009               (*CONST_DOUBLE_REAL_VALUE (elt),
25010                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
25011                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
25012           }
25013
25014       return true;
25015     }
25016
25017   return default_assemble_integer (x, size, aligned_p);
25018 }
25019
25020 static void
25021 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
25022 {
25023   section *s;
25024
25025   if (!TARGET_AAPCS_BASED)
25026     {
25027       (is_ctor ?
25028        default_named_section_asm_out_constructor
25029        : default_named_section_asm_out_destructor) (symbol, priority);
25030       return;
25031     }
25032
25033   /* Put these in the .init_array section, using a special relocation.  */
25034   if (priority != DEFAULT_INIT_PRIORITY)
25035     {
25036       char buf[18];
25037       sprintf (buf, "%s.%.5u",
25038                is_ctor ? ".init_array" : ".fini_array",
25039                priority);
25040       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
25041     }
25042   else if (is_ctor)
25043     s = ctors_section;
25044   else
25045     s = dtors_section;
25046
25047   switch_to_section (s);
25048   assemble_align (POINTER_SIZE);
25049   fputs ("\t.word\t", asm_out_file);
25050   output_addr_const (asm_out_file, symbol);
25051   fputs ("(target1)\n", asm_out_file);
25052 }
25053
25054 /* Add a function to the list of static constructors.  */
25055
25056 static void
25057 arm_elf_asm_constructor (rtx symbol, int priority)
25058 {
25059   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25060 }
25061
25062 /* Add a function to the list of static destructors.  */
25063
25064 static void
25065 arm_elf_asm_destructor (rtx symbol, int priority)
25066 {
25067   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25068 }
25069 \f
25070 /* A finite state machine takes care of noticing whether or not instructions
25071    can be conditionally executed, and thus decrease execution time and code
25072    size by deleting branch instructions.  The fsm is controlled by
25073    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
25074
25075 /* The state of the fsm controlling condition codes are:
25076    0: normal, do nothing special
25077    1: make ASM_OUTPUT_OPCODE not output this instruction
25078    2: make ASM_OUTPUT_OPCODE not output this instruction
25079    3: make instructions conditional
25080    4: make instructions conditional
25081
25082    State transitions (state->state by whom under condition):
25083    0 -> 1 final_prescan_insn if the `target' is a label
25084    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25085    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25086    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25087    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25088           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25089    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25090           (the target insn is arm_target_insn).
25091
25092    If the jump clobbers the conditions then we use states 2 and 4.
25093
25094    A similar thing can be done with conditional return insns.
25095
25096    XXX In case the `target' is an unconditional branch, this conditionalising
25097    of the instructions always reduces code size, but not always execution
25098    time.  But then, I want to reduce the code size to somewhere near what
25099    /bin/cc produces.  */
25100
25101 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25102    instructions.  When a COND_EXEC instruction is seen the subsequent
25103    instructions are scanned so that multiple conditional instructions can be
25104    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
25105    specify the length and true/false mask for the IT block.  These will be
25106    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
25107
25108 /* Returns the index of the ARM condition code string in
25109    `arm_condition_codes', or ARM_NV if the comparison is invalid.
25110    COMPARISON should be an rtx like `(eq (...) (...))'.  */
25111
25112 enum arm_cond_code
25113 maybe_get_arm_condition_code (rtx comparison)
25114 {
25115   machine_mode mode = GET_MODE (XEXP (comparison, 0));
25116   enum arm_cond_code code;
25117   enum rtx_code comp_code = GET_CODE (comparison);
25118
25119   if (GET_MODE_CLASS (mode) != MODE_CC)
25120     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25121                            XEXP (comparison, 1));
25122
25123   switch (mode)
25124     {
25125     case E_CC_DNEmode: code = ARM_NE; goto dominance;
25126     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25127     case E_CC_DGEmode: code = ARM_GE; goto dominance;
25128     case E_CC_DGTmode: code = ARM_GT; goto dominance;
25129     case E_CC_DLEmode: code = ARM_LE; goto dominance;
25130     case E_CC_DLTmode: code = ARM_LT; goto dominance;
25131     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25132     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25133     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25134     case E_CC_DLTUmode: code = ARM_CC;
25135
25136     dominance:
25137       if (comp_code == EQ)
25138         return ARM_INVERSE_CONDITION_CODE (code);
25139       if (comp_code == NE)
25140         return code;
25141       return ARM_NV;
25142
25143     case E_CC_NZmode:
25144       switch (comp_code)
25145         {
25146         case NE: return ARM_NE;
25147         case EQ: return ARM_EQ;
25148         case GE: return ARM_PL;
25149         case LT: return ARM_MI;
25150         default: return ARM_NV;
25151         }
25152
25153     case E_CC_Zmode:
25154       switch (comp_code)
25155         {
25156         case NE: return ARM_NE;
25157         case EQ: return ARM_EQ;
25158         default: return ARM_NV;
25159         }
25160
25161     case E_CC_Nmode:
25162       switch (comp_code)
25163         {
25164         case NE: return ARM_MI;
25165         case EQ: return ARM_PL;
25166         default: return ARM_NV;
25167         }
25168
25169     case E_CCFPEmode:
25170     case E_CCFPmode:
25171       /* We can handle all cases except UNEQ and LTGT.  */
25172       switch (comp_code)
25173         {
25174         case GE: return ARM_GE;
25175         case GT: return ARM_GT;
25176         case LE: return ARM_LS;
25177         case LT: return ARM_MI;
25178         case NE: return ARM_NE;
25179         case EQ: return ARM_EQ;
25180         case ORDERED: return ARM_VC;
25181         case UNORDERED: return ARM_VS;
25182         case UNLT: return ARM_LT;
25183         case UNLE: return ARM_LE;
25184         case UNGT: return ARM_HI;
25185         case UNGE: return ARM_PL;
25186           /* UNEQ and LTGT do not have a representation.  */
25187         case UNEQ: /* Fall through.  */
25188         case LTGT: /* Fall through.  */
25189         default: return ARM_NV;
25190         }
25191
25192     case E_CC_SWPmode:
25193       switch (comp_code)
25194         {
25195         case NE: return ARM_NE;
25196         case EQ: return ARM_EQ;
25197         case GE: return ARM_LE;
25198         case GT: return ARM_LT;
25199         case LE: return ARM_GE;
25200         case LT: return ARM_GT;
25201         case GEU: return ARM_LS;
25202         case GTU: return ARM_CC;
25203         case LEU: return ARM_CS;
25204         case LTU: return ARM_HI;
25205         default: return ARM_NV;
25206         }
25207
25208     case E_CC_Cmode:
25209       switch (comp_code)
25210         {
25211         case LTU: return ARM_CS;
25212         case GEU: return ARM_CC;
25213         default: return ARM_NV;
25214         }
25215
25216     case E_CC_NVmode:
25217       switch (comp_code)
25218         {
25219         case GE: return ARM_GE;
25220         case LT: return ARM_LT;
25221         default: return ARM_NV;
25222         }
25223
25224     case E_CC_Bmode:
25225       switch (comp_code)
25226         {
25227         case GEU: return ARM_CS;
25228         case LTU: return ARM_CC;
25229         default: return ARM_NV;
25230         }
25231
25232     case E_CC_Vmode:
25233       switch (comp_code)
25234         {
25235         case NE: return ARM_VS;
25236         case EQ: return ARM_VC;
25237         default: return ARM_NV;
25238         }
25239
25240     case E_CC_ADCmode:
25241       switch (comp_code)
25242         {
25243         case GEU: return ARM_CS;
25244         case LTU: return ARM_CC;
25245         default: return ARM_NV;
25246         }
25247
25248     case E_CCmode:
25249     case E_CC_RSBmode:
25250       switch (comp_code)
25251         {
25252         case NE: return ARM_NE;
25253         case EQ: return ARM_EQ;
25254         case GE: return ARM_GE;
25255         case GT: return ARM_GT;
25256         case LE: return ARM_LE;
25257         case LT: return ARM_LT;
25258         case GEU: return ARM_CS;
25259         case GTU: return ARM_HI;
25260         case LEU: return ARM_LS;
25261         case LTU: return ARM_CC;
25262         default: return ARM_NV;
25263         }
25264
25265     default: gcc_unreachable ();
25266     }
25267 }
25268
25269 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
25270 static enum arm_cond_code
25271 get_arm_condition_code (rtx comparison)
25272 {
25273   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25274   gcc_assert (code != ARM_NV);
25275   return code;
25276 }
25277
25278 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
25279    code registers when not targetting Thumb1.  The VFP condition register
25280    only exists when generating hard-float code.  */
25281 static bool
25282 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25283 {
25284   if (!TARGET_32BIT)
25285     return false;
25286
25287   *p1 = CC_REGNUM;
25288   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25289   return true;
25290 }
25291
25292 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25293    instructions.  */
25294 void
25295 thumb2_final_prescan_insn (rtx_insn *insn)
25296 {
25297   rtx_insn *first_insn = insn;
25298   rtx body = PATTERN (insn);
25299   rtx predicate;
25300   enum arm_cond_code code;
25301   int n;
25302   int mask;
25303   int max;
25304
25305   /* max_insns_skipped in the tune was already taken into account in the
25306      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
25307      just emit the IT blocks as we can.  It does not make sense to split
25308      the IT blocks.  */
25309   max = MAX_INSN_PER_IT_BLOCK;
25310
25311   /* Remove the previous insn from the count of insns to be output.  */
25312   if (arm_condexec_count)
25313       arm_condexec_count--;
25314
25315   /* Nothing to do if we are already inside a conditional block.  */
25316   if (arm_condexec_count)
25317     return;
25318
25319   if (GET_CODE (body) != COND_EXEC)
25320     return;
25321
25322   /* Conditional jumps are implemented directly.  */
25323   if (JUMP_P (insn))
25324     return;
25325
25326   predicate = COND_EXEC_TEST (body);
25327   arm_current_cc = get_arm_condition_code (predicate);
25328
25329   n = get_attr_ce_count (insn);
25330   arm_condexec_count = 1;
25331   arm_condexec_mask = (1 << n) - 1;
25332   arm_condexec_masklen = n;
25333   /* See if subsequent instructions can be combined into the same block.  */
25334   for (;;)
25335     {
25336       insn = next_nonnote_insn (insn);
25337
25338       /* Jumping into the middle of an IT block is illegal, so a label or
25339          barrier terminates the block.  */
25340       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25341         break;
25342
25343       body = PATTERN (insn);
25344       /* USE and CLOBBER aren't really insns, so just skip them.  */
25345       if (GET_CODE (body) == USE
25346           || GET_CODE (body) == CLOBBER)
25347         continue;
25348
25349       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
25350       if (GET_CODE (body) != COND_EXEC)
25351         break;
25352       /* Maximum number of conditionally executed instructions in a block.  */
25353       n = get_attr_ce_count (insn);
25354       if (arm_condexec_masklen + n > max)
25355         break;
25356
25357       predicate = COND_EXEC_TEST (body);
25358       code = get_arm_condition_code (predicate);
25359       mask = (1 << n) - 1;
25360       if (arm_current_cc == code)
25361         arm_condexec_mask |= (mask << arm_condexec_masklen);
25362       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25363         break;
25364
25365       arm_condexec_count++;
25366       arm_condexec_masklen += n;
25367
25368       /* A jump must be the last instruction in a conditional block.  */
25369       if (JUMP_P (insn))
25370         break;
25371     }
25372   /* Restore recog_data (getting the attributes of other insns can
25373      destroy this array, but final.cc assumes that it remains intact
25374      across this call).  */
25375   extract_constrain_insn_cached (first_insn);
25376 }
25377
25378 void
25379 arm_final_prescan_insn (rtx_insn *insn)
25380 {
25381   /* BODY will hold the body of INSN.  */
25382   rtx body = PATTERN (insn);
25383
25384   /* This will be 1 if trying to repeat the trick, and things need to be
25385      reversed if it appears to fail.  */
25386   int reverse = 0;
25387
25388   /* If we start with a return insn, we only succeed if we find another one.  */
25389   int seeking_return = 0;
25390   enum rtx_code return_code = UNKNOWN;
25391
25392   /* START_INSN will hold the insn from where we start looking.  This is the
25393      first insn after the following code_label if REVERSE is true.  */
25394   rtx_insn *start_insn = insn;
25395
25396   /* If in state 4, check if the target branch is reached, in order to
25397      change back to state 0.  */
25398   if (arm_ccfsm_state == 4)
25399     {
25400       if (insn == arm_target_insn)
25401         {
25402           arm_target_insn = NULL;
25403           arm_ccfsm_state = 0;
25404         }
25405       return;
25406     }
25407
25408   /* If in state 3, it is possible to repeat the trick, if this insn is an
25409      unconditional branch to a label, and immediately following this branch
25410      is the previous target label which is only used once, and the label this
25411      branch jumps to is not too far off.  */
25412   if (arm_ccfsm_state == 3)
25413     {
25414       if (simplejump_p (insn))
25415         {
25416           start_insn = next_nonnote_insn (start_insn);
25417           if (BARRIER_P (start_insn))
25418             {
25419               /* XXX Isn't this always a barrier?  */
25420               start_insn = next_nonnote_insn (start_insn);
25421             }
25422           if (LABEL_P (start_insn)
25423               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25424               && LABEL_NUSES (start_insn) == 1)
25425             reverse = TRUE;
25426           else
25427             return;
25428         }
25429       else if (ANY_RETURN_P (body))
25430         {
25431           start_insn = next_nonnote_insn (start_insn);
25432           if (BARRIER_P (start_insn))
25433             start_insn = next_nonnote_insn (start_insn);
25434           if (LABEL_P (start_insn)
25435               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25436               && LABEL_NUSES (start_insn) == 1)
25437             {
25438               reverse = TRUE;
25439               seeking_return = 1;
25440               return_code = GET_CODE (body);
25441             }
25442           else
25443             return;
25444         }
25445       else
25446         return;
25447     }
25448
25449   gcc_assert (!arm_ccfsm_state || reverse);
25450   if (!JUMP_P (insn))
25451     return;
25452
25453   /* This jump might be paralleled with a clobber of the condition codes
25454      the jump should always come first */
25455   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25456     body = XVECEXP (body, 0, 0);
25457
25458   if (reverse
25459       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25460           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25461     {
25462       int insns_skipped;
25463       int fail = FALSE, succeed = FALSE;
25464       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25465       int then_not_else = TRUE;
25466       rtx_insn *this_insn = start_insn;
25467       rtx label = 0;
25468
25469       /* Register the insn jumped to.  */
25470       if (reverse)
25471         {
25472           if (!seeking_return)
25473             label = XEXP (SET_SRC (body), 0);
25474         }
25475       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25476         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25477       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25478         {
25479           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25480           then_not_else = FALSE;
25481         }
25482       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25483         {
25484           seeking_return = 1;
25485           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25486         }
25487       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25488         {
25489           seeking_return = 1;
25490           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25491           then_not_else = FALSE;
25492         }
25493       else
25494         gcc_unreachable ();
25495
25496       /* See how many insns this branch skips, and what kind of insns.  If all
25497          insns are okay, and the label or unconditional branch to the same
25498          label is not too far away, succeed.  */
25499       for (insns_skipped = 0;
25500            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25501         {
25502           rtx scanbody;
25503
25504           this_insn = next_nonnote_insn (this_insn);
25505           if (!this_insn)
25506             break;
25507
25508           switch (GET_CODE (this_insn))
25509             {
25510             case CODE_LABEL:
25511               /* Succeed if it is the target label, otherwise fail since
25512                  control falls in from somewhere else.  */
25513               if (this_insn == label)
25514                 {
25515                   arm_ccfsm_state = 1;
25516                   succeed = TRUE;
25517                 }
25518               else
25519                 fail = TRUE;
25520               break;
25521
25522             case BARRIER:
25523               /* Succeed if the following insn is the target label.
25524                  Otherwise fail.
25525                  If return insns are used then the last insn in a function
25526                  will be a barrier.  */
25527               this_insn = next_nonnote_insn (this_insn);
25528               if (this_insn && this_insn == label)
25529                 {
25530                   arm_ccfsm_state = 1;
25531                   succeed = TRUE;
25532                 }
25533               else
25534                 fail = TRUE;
25535               break;
25536
25537             case CALL_INSN:
25538               /* The AAPCS says that conditional calls should not be
25539                  used since they make interworking inefficient (the
25540                  linker can't transform BL<cond> into BLX).  That's
25541                  only a problem if the machine has BLX.  */
25542               if (arm_arch5t)
25543                 {
25544                   fail = TRUE;
25545                   break;
25546                 }
25547
25548               /* Succeed if the following insn is the target label, or
25549                  if the following two insns are a barrier and the
25550                  target label.  */
25551               this_insn = next_nonnote_insn (this_insn);
25552               if (this_insn && BARRIER_P (this_insn))
25553                 this_insn = next_nonnote_insn (this_insn);
25554
25555               if (this_insn && this_insn == label
25556                   && insns_skipped < max_insns_skipped)
25557                 {
25558                   arm_ccfsm_state = 1;
25559                   succeed = TRUE;
25560                 }
25561               else
25562                 fail = TRUE;
25563               break;
25564
25565             case JUMP_INSN:
25566               /* If this is an unconditional branch to the same label, succeed.
25567                  If it is to another label, do nothing.  If it is conditional,
25568                  fail.  */
25569               /* XXX Probably, the tests for SET and the PC are
25570                  unnecessary.  */
25571
25572               scanbody = PATTERN (this_insn);
25573               if (GET_CODE (scanbody) == SET
25574                   && GET_CODE (SET_DEST (scanbody)) == PC)
25575                 {
25576                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25577                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25578                     {
25579                       arm_ccfsm_state = 2;
25580                       succeed = TRUE;
25581                     }
25582                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25583                     fail = TRUE;
25584                 }
25585               /* Fail if a conditional return is undesirable (e.g. on a
25586                  StrongARM), but still allow this if optimizing for size.  */
25587               else if (GET_CODE (scanbody) == return_code
25588                        && !use_return_insn (TRUE, NULL)
25589                        && !optimize_size)
25590                 fail = TRUE;
25591               else if (GET_CODE (scanbody) == return_code)
25592                 {
25593                   arm_ccfsm_state = 2;
25594                   succeed = TRUE;
25595                 }
25596               else if (GET_CODE (scanbody) == PARALLEL)
25597                 {
25598                   switch (get_attr_conds (this_insn))
25599                     {
25600                     case CONDS_NOCOND:
25601                       break;
25602                     default:
25603                       fail = TRUE;
25604                       break;
25605                     }
25606                 }
25607               else
25608                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25609
25610               break;
25611
25612             case INSN:
25613               /* Instructions using or affecting the condition codes make it
25614                  fail.  */
25615               scanbody = PATTERN (this_insn);
25616               if (!(GET_CODE (scanbody) == SET
25617                     || GET_CODE (scanbody) == PARALLEL)
25618                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25619                 fail = TRUE;
25620               break;
25621
25622             default:
25623               break;
25624             }
25625         }
25626       if (succeed)
25627         {
25628           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25629             arm_target_label = CODE_LABEL_NUMBER (label);
25630           else
25631             {
25632               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25633
25634               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25635                 {
25636                   this_insn = next_nonnote_insn (this_insn);
25637                   gcc_assert (!this_insn
25638                               || (!BARRIER_P (this_insn)
25639                                   && !LABEL_P (this_insn)));
25640                 }
25641               if (!this_insn)
25642                 {
25643                   /* Oh, dear! we ran off the end.. give up.  */
25644                   extract_constrain_insn_cached (insn);
25645                   arm_ccfsm_state = 0;
25646                   arm_target_insn = NULL;
25647                   return;
25648                 }
25649               arm_target_insn = this_insn;
25650             }
25651
25652           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25653              what it was.  */
25654           if (!reverse)
25655             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25656
25657           if (reverse || then_not_else)
25658             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25659         }
25660
25661       /* Restore recog_data (getting the attributes of other insns can
25662          destroy this array, but final.cc assumes that it remains intact
25663          across this call.  */
25664       extract_constrain_insn_cached (insn);
25665     }
25666 }
25667
25668 /* Output IT instructions.  */
25669 void
25670 thumb2_asm_output_opcode (FILE * stream)
25671 {
25672   char buff[5];
25673   int n;
25674
25675   if (arm_condexec_mask)
25676     {
25677       for (n = 0; n < arm_condexec_masklen; n++)
25678         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25679       buff[n] = 0;
25680       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25681                   arm_condition_codes[arm_current_cc]);
25682       arm_condexec_mask = 0;
25683     }
25684 }
25685
25686 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25687    UNITS_PER_WORD bytes wide.  */
25688 static unsigned int
25689 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25690 {
25691   if (IS_VPR_REGNUM (regno))
25692     return CEIL (GET_MODE_SIZE (mode), 2);
25693
25694   if (TARGET_32BIT
25695       && regno > PC_REGNUM
25696       && regno != FRAME_POINTER_REGNUM
25697       && regno != ARG_POINTER_REGNUM
25698       && !IS_VFP_REGNUM (regno))
25699     return 1;
25700
25701   return ARM_NUM_REGS (mode);
25702 }
25703
25704 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25705 static bool
25706 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25707 {
25708   if (GET_MODE_CLASS (mode) == MODE_CC)
25709     return (regno == CC_REGNUM
25710             || (TARGET_VFP_BASE
25711                 && regno == VFPCC_REGNUM));
25712
25713   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25714     return false;
25715
25716   if (IS_VPR_REGNUM (regno))
25717     return VALID_MVE_PRED_MODE (mode);
25718
25719   if (TARGET_THUMB1)
25720     /* For the Thumb we only allow values bigger than SImode in
25721        registers 0 - 6, so that there is always a second low
25722        register available to hold the upper part of the value.
25723        We probably we ought to ensure that the register is the
25724        start of an even numbered register pair.  */
25725     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25726
25727   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25728     {
25729       if (mode == DFmode || mode == DImode)
25730         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25731
25732       if (mode == HFmode || mode == BFmode || mode == HImode
25733           || mode == SFmode || mode == SImode)
25734         return VFP_REGNO_OK_FOR_SINGLE (regno);
25735
25736       if (TARGET_NEON)
25737         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25738                || (VALID_NEON_QREG_MODE (mode)
25739                    && NEON_REGNO_OK_FOR_QUAD (regno))
25740                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25741                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25742                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25743                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25744                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25745      if (TARGET_HAVE_MVE)
25746        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25747                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25748                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25749
25750       return false;
25751     }
25752
25753   if (TARGET_REALLY_IWMMXT)
25754     {
25755       if (IS_IWMMXT_GR_REGNUM (regno))
25756         return mode == SImode;
25757
25758       if (IS_IWMMXT_REGNUM (regno))
25759         return VALID_IWMMXT_REG_MODE (mode);
25760     }
25761
25762   /* We allow almost any value to be stored in the general registers.
25763      Restrict doubleword quantities to even register pairs in ARM state
25764      so that we can use ldrd. The same restriction applies for MVE
25765      in order to support Armv8.1-M Mainline instructions.
25766      Do not allow very large Neon structure  opaque modes in general
25767      registers; they would use too many.  */
25768   if (regno <= LAST_ARM_REGNUM)
25769     {
25770       if (ARM_NUM_REGS (mode) > 4)
25771         return false;
25772
25773       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25774         return true;
25775
25776       return !((TARGET_LDRD || TARGET_CDE)
25777                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25778     }
25779
25780   if (regno == FRAME_POINTER_REGNUM
25781       || regno == ARG_POINTER_REGNUM)
25782     /* We only allow integers in the fake hard registers.  */
25783     return GET_MODE_CLASS (mode) == MODE_INT;
25784
25785   return false;
25786 }
25787
25788 /* Implement TARGET_MODES_TIEABLE_P.  */
25789
25790 static bool
25791 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25792 {
25793   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25794     return true;
25795
25796   if (TARGET_HAVE_MVE
25797       && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25798     return true;
25799
25800   /* We specifically want to allow elements of "structure" modes to
25801      be tieable to the structure.  This more general condition allows
25802      other rarer situations too.  */
25803   if ((TARGET_NEON
25804        && (VALID_NEON_DREG_MODE (mode1)
25805            || VALID_NEON_QREG_MODE (mode1)
25806            || VALID_NEON_STRUCT_MODE (mode1))
25807        && (VALID_NEON_DREG_MODE (mode2)
25808            || VALID_NEON_QREG_MODE (mode2)
25809            || VALID_NEON_STRUCT_MODE (mode2)))
25810       || (TARGET_HAVE_MVE
25811           && (VALID_MVE_MODE (mode1)
25812               || VALID_MVE_STRUCT_MODE (mode1))
25813           && (VALID_MVE_MODE (mode2)
25814               || VALID_MVE_STRUCT_MODE (mode2))))
25815     return true;
25816
25817   return false;
25818 }
25819
25820 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25821    not used in arm mode.  */
25822
25823 enum reg_class
25824 arm_regno_class (int regno)
25825 {
25826   if (regno == PC_REGNUM)
25827     return NO_REGS;
25828
25829   if (IS_VPR_REGNUM (regno))
25830     return VPR_REG;
25831
25832   if (IS_PAC_REGNUM (regno))
25833     return PAC_REG;
25834
25835   if (TARGET_THUMB1)
25836     {
25837       if (regno == STACK_POINTER_REGNUM)
25838         return STACK_REG;
25839       if (regno == CC_REGNUM)
25840         return CC_REG;
25841       if (regno < 8)
25842         return LO_REGS;
25843       return HI_REGS;
25844     }
25845
25846   if (TARGET_THUMB2 && regno < 8)
25847     return LO_REGS;
25848
25849   if (   regno <= LAST_ARM_REGNUM
25850       || regno == FRAME_POINTER_REGNUM
25851       || regno == ARG_POINTER_REGNUM)
25852     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25853
25854   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25855     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25856
25857   if (IS_VFP_REGNUM (regno))
25858     {
25859       if (regno <= D7_VFP_REGNUM)
25860         return VFP_D0_D7_REGS;
25861       else if (regno <= LAST_LO_VFP_REGNUM)
25862         return VFP_LO_REGS;
25863       else
25864         return VFP_HI_REGS;
25865     }
25866
25867   if (IS_IWMMXT_REGNUM (regno))
25868     return IWMMXT_REGS;
25869
25870   if (IS_IWMMXT_GR_REGNUM (regno))
25871     return IWMMXT_GR_REGS;
25872
25873   return NO_REGS;
25874 }
25875
25876 /* Handle a special case when computing the offset
25877    of an argument from the frame pointer.  */
25878 int
25879 arm_debugger_arg_offset (int value, rtx addr)
25880 {
25881   rtx_insn *insn;
25882
25883   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25884   if (value != 0)
25885     return 0;
25886
25887   /* We can only cope with the case where the address is held in a register.  */
25888   if (!REG_P (addr))
25889     return 0;
25890
25891   /* If we are using the frame pointer to point at the argument, then
25892      an offset of 0 is correct.  */
25893   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25894     return 0;
25895
25896   /* If we are using the stack pointer to point at the
25897      argument, then an offset of 0 is correct.  */
25898   /* ??? Check this is consistent with thumb2 frame layout.  */
25899   if ((TARGET_THUMB || !frame_pointer_needed)
25900       && REGNO (addr) == SP_REGNUM)
25901     return 0;
25902
25903   /* Oh dear.  The argument is pointed to by a register rather
25904      than being held in a register, or being stored at a known
25905      offset from the frame pointer.  Since GDB only understands
25906      those two kinds of argument we must translate the address
25907      held in the register into an offset from the frame pointer.
25908      We do this by searching through the insns for the function
25909      looking to see where this register gets its value.  If the
25910      register is initialized from the frame pointer plus an offset
25911      then we are in luck and we can continue, otherwise we give up.
25912
25913      This code is exercised by producing debugging information
25914      for a function with arguments like this:
25915
25916            double func (double a, double b, int c, double d) {return d;}
25917
25918      Without this code the stab for parameter 'd' will be set to
25919      an offset of 0 from the frame pointer, rather than 8.  */
25920
25921   /* The if() statement says:
25922
25923      If the insn is a normal instruction
25924      and if the insn is setting the value in a register
25925      and if the register being set is the register holding the address of the argument
25926      and if the address is computing by an addition
25927      that involves adding to a register
25928      which is the frame pointer
25929      a constant integer
25930
25931      then...  */
25932
25933   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25934     {
25935       if (   NONJUMP_INSN_P (insn)
25936           && GET_CODE (PATTERN (insn)) == SET
25937           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25938           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25939           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25940           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25941           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25942              )
25943         {
25944           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25945
25946           break;
25947         }
25948     }
25949
25950   if (value == 0)
25951     {
25952       debug_rtx (addr);
25953       warning (0, "unable to compute real location of stacked parameter");
25954       value = 8; /* XXX magic hack */
25955     }
25956
25957   return value;
25958 }
25959 \f
25960 /* Implement TARGET_PROMOTED_TYPE.  */
25961
25962 static tree
25963 arm_promoted_type (const_tree t)
25964 {
25965   if (SCALAR_FLOAT_TYPE_P (t)
25966       && TYPE_PRECISION (t) == 16
25967       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25968     return float_type_node;
25969   return NULL_TREE;
25970 }
25971
25972 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25973    This simply adds HFmode as a supported mode; even though we don't
25974    implement arithmetic on this type directly, it's supported by
25975    optabs conversions, much the way the double-word arithmetic is
25976    special-cased in the default hook.  */
25977
25978 static bool
25979 arm_scalar_mode_supported_p (scalar_mode mode)
25980 {
25981   if (mode == HFmode)
25982     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25983   else if (ALL_FIXED_POINT_MODE_P (mode))
25984     return true;
25985   else
25986     return default_scalar_mode_supported_p (mode);
25987 }
25988
25989 /* Set the value of FLT_EVAL_METHOD.
25990    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25991
25992     0: evaluate all operations and constants, whose semantic type has at
25993        most the range and precision of type float, to the range and
25994        precision of float; evaluate all other operations and constants to
25995        the range and precision of the semantic type;
25996
25997     N, where _FloatN is a supported interchange floating type
25998        evaluate all operations and constants, whose semantic type has at
25999        most the range and precision of _FloatN type, to the range and
26000        precision of the _FloatN type; evaluate all other operations and
26001        constants to the range and precision of the semantic type;
26002
26003    If we have the ARMv8.2-A extensions then we support _Float16 in native
26004    precision, so we should set this to 16.  Otherwise, we support the type,
26005    but want to evaluate expressions in float precision, so set this to
26006    0.  */
26007
26008 static enum flt_eval_method
26009 arm_excess_precision (enum excess_precision_type type)
26010 {
26011   switch (type)
26012     {
26013       case EXCESS_PRECISION_TYPE_FAST:
26014       case EXCESS_PRECISION_TYPE_STANDARD:
26015         /* We can calculate either in 16-bit range and precision or
26016            32-bit range and precision.  Make that decision based on whether
26017            we have native support for the ARMv8.2-A 16-bit floating-point
26018            instructions or not.  */
26019         return (TARGET_VFP_FP16INST
26020                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26021                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
26022       case EXCESS_PRECISION_TYPE_IMPLICIT:
26023       case EXCESS_PRECISION_TYPE_FLOAT16:
26024         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26025       default:
26026         gcc_unreachable ();
26027     }
26028   return FLT_EVAL_METHOD_UNPREDICTABLE;
26029 }
26030
26031
26032 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
26033    _Float16 if we are using anything other than ieee format for 16-bit
26034    floating point.  Otherwise, punt to the default implementation.  */
26035 static opt_scalar_float_mode
26036 arm_floatn_mode (int n, bool extended)
26037 {
26038   if (!extended && n == 16)
26039     {
26040       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
26041         return HFmode;
26042       return opt_scalar_float_mode ();
26043     }
26044
26045   return default_floatn_mode (n, extended);
26046 }
26047
26048
26049 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26050    not to early-clobber SRC registers in the process.
26051
26052    We assume that the operands described by SRC and DEST represent a
26053    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
26054    number of components into which the copy has been decomposed.  */
26055 void
26056 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26057 {
26058   unsigned int i;
26059
26060   if (!reg_overlap_mentioned_p (operands[0], operands[1])
26061       || REGNO (operands[0]) < REGNO (operands[1]))
26062     {
26063       for (i = 0; i < count; i++)
26064         {
26065           operands[2 * i] = dest[i];
26066           operands[2 * i + 1] = src[i];
26067         }
26068     }
26069   else
26070     {
26071       for (i = 0; i < count; i++)
26072         {
26073           operands[2 * i] = dest[count - i - 1];
26074           operands[2 * i + 1] = src[count - i - 1];
26075         }
26076     }
26077 }
26078
26079 /* Split operands into moves from op[1] + op[2] into op[0].  */
26080
26081 void
26082 neon_split_vcombine (rtx operands[3])
26083 {
26084   unsigned int dest = REGNO (operands[0]);
26085   unsigned int src1 = REGNO (operands[1]);
26086   unsigned int src2 = REGNO (operands[2]);
26087   machine_mode halfmode = GET_MODE (operands[1]);
26088   unsigned int halfregs = REG_NREGS (operands[1]);
26089   rtx destlo, desthi;
26090
26091   if (src1 == dest && src2 == dest + halfregs)
26092     {
26093       /* No-op move.  Can't split to nothing; emit something.  */
26094       emit_note (NOTE_INSN_DELETED);
26095       return;
26096     }
26097
26098   /* Preserve register attributes for variable tracking.  */
26099   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26100   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26101                                GET_MODE_SIZE (halfmode));
26102
26103   /* Special case of reversed high/low parts.  Use VSWP.  */
26104   if (src2 == dest && src1 == dest + halfregs)
26105     {
26106       rtx x = gen_rtx_SET (destlo, operands[1]);
26107       rtx y = gen_rtx_SET (desthi, operands[2]);
26108       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26109       return;
26110     }
26111
26112   if (!reg_overlap_mentioned_p (operands[2], destlo))
26113     {
26114       /* Try to avoid unnecessary moves if part of the result
26115          is in the right place already.  */
26116       if (src1 != dest)
26117         emit_move_insn (destlo, operands[1]);
26118       if (src2 != dest + halfregs)
26119         emit_move_insn (desthi, operands[2]);
26120     }
26121   else
26122     {
26123       if (src2 != dest + halfregs)
26124         emit_move_insn (desthi, operands[2]);
26125       if (src1 != dest)
26126         emit_move_insn (destlo, operands[1]);
26127     }
26128 }
26129 \f
26130 /* Return the number (counting from 0) of
26131    the least significant set bit in MASK.  */
26132
26133 inline static int
26134 number_of_first_bit_set (unsigned mask)
26135 {
26136   return ctz_hwi (mask);
26137 }
26138
26139 /* Like emit_multi_reg_push, but allowing for a different set of
26140    registers to be described as saved.  MASK is the set of registers
26141    to be saved; REAL_REGS is the set of registers to be described as
26142    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26143
26144 static rtx_insn *
26145 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26146 {
26147   unsigned long regno;
26148   rtx par[10], tmp, reg;
26149   rtx_insn *insn;
26150   int i, j;
26151
26152   /* Build the parallel of the registers actually being stored.  */
26153   for (i = 0; mask; ++i, mask &= mask - 1)
26154     {
26155       regno = ctz_hwi (mask);
26156       reg = gen_rtx_REG (SImode, regno);
26157
26158       if (i == 0)
26159         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26160       else
26161         tmp = gen_rtx_USE (VOIDmode, reg);
26162
26163       par[i] = tmp;
26164     }
26165
26166   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26167   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26168   tmp = gen_frame_mem (BLKmode, tmp);
26169   tmp = gen_rtx_SET (tmp, par[0]);
26170   par[0] = tmp;
26171
26172   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26173   insn = emit_insn (tmp);
26174
26175   /* Always build the stack adjustment note for unwind info.  */
26176   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26177   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26178   par[0] = tmp;
26179
26180   /* Build the parallel of the registers recorded as saved for unwind.  */
26181   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26182     {
26183       regno = ctz_hwi (real_regs);
26184       reg = gen_rtx_REG (SImode, regno);
26185
26186       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26187       tmp = gen_frame_mem (SImode, tmp);
26188       tmp = gen_rtx_SET (tmp, reg);
26189       RTX_FRAME_RELATED_P (tmp) = 1;
26190       par[j + 1] = tmp;
26191     }
26192
26193   if (j == 0)
26194     tmp = par[0];
26195   else
26196     {
26197       RTX_FRAME_RELATED_P (par[0]) = 1;
26198       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26199     }
26200
26201   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26202
26203   return insn;
26204 }
26205
26206 /* Emit code to push or pop registers to or from the stack.  F is the
26207    assembly file.  MASK is the registers to pop.  */
26208 static void
26209 thumb_pop (FILE *f, unsigned long mask)
26210 {
26211   int regno;
26212   int lo_mask = mask & 0xFF;
26213
26214   gcc_assert (mask);
26215
26216   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26217     {
26218       /* Special case.  Do not generate a POP PC statement here, do it in
26219          thumb_exit() */
26220       thumb_exit (f, -1);
26221       return;
26222     }
26223
26224   fprintf (f, "\tpop\t{");
26225
26226   /* Look at the low registers first.  */
26227   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26228     {
26229       if (lo_mask & 1)
26230         {
26231           asm_fprintf (f, "%r", regno);
26232
26233           if ((lo_mask & ~1) != 0)
26234             fprintf (f, ", ");
26235         }
26236     }
26237
26238   if (mask & (1 << PC_REGNUM))
26239     {
26240       /* Catch popping the PC.  */
26241       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26242           || IS_CMSE_ENTRY (arm_current_func_type ()))
26243         {
26244           /* The PC is never poped directly, instead
26245              it is popped into r3 and then BX is used.  */
26246           fprintf (f, "}\n");
26247
26248           thumb_exit (f, -1);
26249
26250           return;
26251         }
26252       else
26253         {
26254           if (mask & 0xFF)
26255             fprintf (f, ", ");
26256
26257           asm_fprintf (f, "%r", PC_REGNUM);
26258         }
26259     }
26260
26261   fprintf (f, "}\n");
26262 }
26263
26264 /* Generate code to return from a thumb function.
26265    If 'reg_containing_return_addr' is -1, then the return address is
26266    actually on the stack, at the stack pointer.
26267
26268    Note: do not forget to update length attribute of corresponding insn pattern
26269    when changing assembly output (eg. length attribute of epilogue_insns when
26270    updating Armv8-M Baseline Security Extensions register clearing
26271    sequences).  */
26272 static void
26273 thumb_exit (FILE *f, int reg_containing_return_addr)
26274 {
26275   unsigned regs_available_for_popping;
26276   unsigned regs_to_pop;
26277   int pops_needed;
26278   unsigned available;
26279   unsigned required;
26280   machine_mode mode;
26281   int size;
26282   int restore_a4 = FALSE;
26283
26284   /* Compute the registers we need to pop.  */
26285   regs_to_pop = 0;
26286   pops_needed = 0;
26287
26288   if (reg_containing_return_addr == -1)
26289     {
26290       regs_to_pop |= 1 << LR_REGNUM;
26291       ++pops_needed;
26292     }
26293
26294   if (TARGET_BACKTRACE)
26295     {
26296       /* Restore the (ARM) frame pointer and stack pointer.  */
26297       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26298       pops_needed += 2;
26299     }
26300
26301   /* If there is nothing to pop then just emit the BX instruction and
26302      return.  */
26303   if (pops_needed == 0)
26304     {
26305       if (crtl->calls_eh_return)
26306         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26307
26308       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26309         {
26310           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26311              emitted by cmse_nonsecure_entry_clear_before_return ().  */
26312           if (!TARGET_HAVE_FPCXT_CMSE)
26313             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26314                          reg_containing_return_addr);
26315           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26316         }
26317       else
26318         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26319       return;
26320     }
26321   /* Otherwise if we are not supporting interworking and we have not created
26322      a backtrace structure and the function was not entered in ARM mode then
26323      just pop the return address straight into the PC.  */
26324   else if (!TARGET_INTERWORK
26325            && !TARGET_BACKTRACE
26326            && !is_called_in_ARM_mode (current_function_decl)
26327            && !crtl->calls_eh_return
26328            && !IS_CMSE_ENTRY (arm_current_func_type ()))
26329     {
26330       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26331       return;
26332     }
26333
26334   /* Find out how many of the (return) argument registers we can corrupt.  */
26335   regs_available_for_popping = 0;
26336
26337   /* If returning via __builtin_eh_return, the bottom three registers
26338      all contain information needed for the return.  */
26339   if (crtl->calls_eh_return)
26340     size = 12;
26341   else
26342     {
26343       /* If we can deduce the registers used from the function's
26344          return value.  This is more reliable that examining
26345          df_regs_ever_live_p () because that will be set if the register is
26346          ever used in the function, not just if the register is used
26347          to hold a return value.  */
26348
26349       if (crtl->return_rtx != 0)
26350         mode = GET_MODE (crtl->return_rtx);
26351       else
26352         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26353
26354       size = GET_MODE_SIZE (mode);
26355
26356       if (size == 0)
26357         {
26358           /* In a void function we can use any argument register.
26359              In a function that returns a structure on the stack
26360              we can use the second and third argument registers.  */
26361           if (mode == VOIDmode)
26362             regs_available_for_popping =
26363               (1 << ARG_REGISTER (1))
26364               | (1 << ARG_REGISTER (2))
26365               | (1 << ARG_REGISTER (3));
26366           else
26367             regs_available_for_popping =
26368               (1 << ARG_REGISTER (2))
26369               | (1 << ARG_REGISTER (3));
26370         }
26371       else if (size <= 4)
26372         regs_available_for_popping =
26373           (1 << ARG_REGISTER (2))
26374           | (1 << ARG_REGISTER (3));
26375       else if (size <= 8)
26376         regs_available_for_popping =
26377           (1 << ARG_REGISTER (3));
26378     }
26379
26380   /* Match registers to be popped with registers into which we pop them.  */
26381   for (available = regs_available_for_popping,
26382        required  = regs_to_pop;
26383        required != 0 && available != 0;
26384        available &= ~(available & - available),
26385        required  &= ~(required  & - required))
26386     -- pops_needed;
26387
26388   /* If we have any popping registers left over, remove them.  */
26389   if (available > 0)
26390     regs_available_for_popping &= ~available;
26391
26392   /* Otherwise if we need another popping register we can use
26393      the fourth argument register.  */
26394   else if (pops_needed)
26395     {
26396       /* If we have not found any free argument registers and
26397          reg a4 contains the return address, we must move it.  */
26398       if (regs_available_for_popping == 0
26399           && reg_containing_return_addr == LAST_ARG_REGNUM)
26400         {
26401           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26402           reg_containing_return_addr = LR_REGNUM;
26403         }
26404       else if (size > 12)
26405         {
26406           /* Register a4 is being used to hold part of the return value,
26407              but we have dire need of a free, low register.  */
26408           restore_a4 = TRUE;
26409
26410           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26411         }
26412
26413       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26414         {
26415           /* The fourth argument register is available.  */
26416           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26417
26418           --pops_needed;
26419         }
26420     }
26421
26422   /* Pop as many registers as we can.  */
26423   thumb_pop (f, regs_available_for_popping);
26424
26425   /* Process the registers we popped.  */
26426   if (reg_containing_return_addr == -1)
26427     {
26428       /* The return address was popped into the lowest numbered register.  */
26429       regs_to_pop &= ~(1 << LR_REGNUM);
26430
26431       reg_containing_return_addr =
26432         number_of_first_bit_set (regs_available_for_popping);
26433
26434       /* Remove this register for the mask of available registers, so that
26435          the return address will not be corrupted by further pops.  */
26436       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26437     }
26438
26439   /* If we popped other registers then handle them here.  */
26440   if (regs_available_for_popping)
26441     {
26442       int frame_pointer;
26443
26444       /* Work out which register currently contains the frame pointer.  */
26445       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26446
26447       /* Move it into the correct place.  */
26448       asm_fprintf (f, "\tmov\t%r, %r\n",
26449                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26450
26451       /* (Temporarily) remove it from the mask of popped registers.  */
26452       regs_available_for_popping &= ~(1 << frame_pointer);
26453       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26454
26455       if (regs_available_for_popping)
26456         {
26457           int stack_pointer;
26458
26459           /* We popped the stack pointer as well,
26460              find the register that contains it.  */
26461           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26462
26463           /* Move it into the stack register.  */
26464           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26465
26466           /* At this point we have popped all necessary registers, so
26467              do not worry about restoring regs_available_for_popping
26468              to its correct value:
26469
26470              assert (pops_needed == 0)
26471              assert (regs_available_for_popping == (1 << frame_pointer))
26472              assert (regs_to_pop == (1 << STACK_POINTER))  */
26473         }
26474       else
26475         {
26476           /* Since we have just move the popped value into the frame
26477              pointer, the popping register is available for reuse, and
26478              we know that we still have the stack pointer left to pop.  */
26479           regs_available_for_popping |= (1 << frame_pointer);
26480         }
26481     }
26482
26483   /* If we still have registers left on the stack, but we no longer have
26484      any registers into which we can pop them, then we must move the return
26485      address into the link register and make available the register that
26486      contained it.  */
26487   if (regs_available_for_popping == 0 && pops_needed > 0)
26488     {
26489       regs_available_for_popping |= 1 << reg_containing_return_addr;
26490
26491       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26492                    reg_containing_return_addr);
26493
26494       reg_containing_return_addr = LR_REGNUM;
26495     }
26496
26497   /* If we have registers left on the stack then pop some more.
26498      We know that at most we will want to pop FP and SP.  */
26499   if (pops_needed > 0)
26500     {
26501       int  popped_into;
26502       int  move_to;
26503
26504       thumb_pop (f, regs_available_for_popping);
26505
26506       /* We have popped either FP or SP.
26507          Move whichever one it is into the correct register.  */
26508       popped_into = number_of_first_bit_set (regs_available_for_popping);
26509       move_to     = number_of_first_bit_set (regs_to_pop);
26510
26511       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26512       --pops_needed;
26513     }
26514
26515   /* If we still have not popped everything then we must have only
26516      had one register available to us and we are now popping the SP.  */
26517   if (pops_needed > 0)
26518     {
26519       int  popped_into;
26520
26521       thumb_pop (f, regs_available_for_popping);
26522
26523       popped_into = number_of_first_bit_set (regs_available_for_popping);
26524
26525       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26526       /*
26527         assert (regs_to_pop == (1 << STACK_POINTER))
26528         assert (pops_needed == 1)
26529       */
26530     }
26531
26532   /* If necessary restore the a4 register.  */
26533   if (restore_a4)
26534     {
26535       if (reg_containing_return_addr != LR_REGNUM)
26536         {
26537           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26538           reg_containing_return_addr = LR_REGNUM;
26539         }
26540
26541       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26542     }
26543
26544   if (crtl->calls_eh_return)
26545     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26546
26547   /* Return to caller.  */
26548   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26549     {
26550       /* This is for the cases where LR is not being used to contain the return
26551          address.  It may therefore contain information that we might not want
26552          to leak, hence it must be cleared.  The value in R0 will never be a
26553          secret at this point, so it is safe to use it, see the clearing code
26554          in cmse_nonsecure_entry_clear_before_return ().  */
26555       if (reg_containing_return_addr != LR_REGNUM)
26556         asm_fprintf (f, "\tmov\tlr, r0\n");
26557
26558       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26559          by cmse_nonsecure_entry_clear_before_return ().  */
26560       if (!TARGET_HAVE_FPCXT_CMSE)
26561         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26562       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26563     }
26564   else
26565     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26566 }
26567 \f
26568 /* Scan INSN just before assembler is output for it.
26569    For Thumb-1, we track the status of the condition codes; this
26570    information is used in the cbranchsi4_insn pattern.  */
26571 void
26572 thumb1_final_prescan_insn (rtx_insn *insn)
26573 {
26574   if (flag_print_asm_name)
26575     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26576                  INSN_ADDRESSES (INSN_UID (insn)));
26577   /* Don't overwrite the previous setter when we get to a cbranch.  */
26578   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26579     {
26580       enum attr_conds conds;
26581
26582       if (cfun->machine->thumb1_cc_insn)
26583         {
26584           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26585               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26586             CC_STATUS_INIT;
26587         }
26588       conds = get_attr_conds (insn);
26589       if (conds == CONDS_SET)
26590         {
26591           rtx set = single_set (insn);
26592           cfun->machine->thumb1_cc_insn = insn;
26593           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26594           cfun->machine->thumb1_cc_op1 = const0_rtx;
26595           cfun->machine->thumb1_cc_mode = CC_NZmode;
26596           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26597             {
26598               rtx src1 = XEXP (SET_SRC (set), 1);
26599               if (src1 == const0_rtx)
26600                 cfun->machine->thumb1_cc_mode = CCmode;
26601             }
26602           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26603             {
26604               /* Record the src register operand instead of dest because
26605                  cprop_hardreg pass propagates src.  */
26606               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26607             }
26608         }
26609       else if (conds != CONDS_NOCOND)
26610         cfun->machine->thumb1_cc_insn = NULL_RTX;
26611     }
26612
26613     /* Check if unexpected far jump is used.  */
26614     if (cfun->machine->lr_save_eliminated
26615         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26616       internal_error("Unexpected thumb1 far jump");
26617 }
26618
26619 int
26620 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26621 {
26622   unsigned HOST_WIDE_INT mask = 0xff;
26623   int i;
26624
26625   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26626   if (val == 0) /* XXX */
26627     return 0;
26628
26629   for (i = 0; i < 25; i++)
26630     if ((val & (mask << i)) == val)
26631       return 1;
26632
26633   return 0;
26634 }
26635
26636 /* Returns nonzero if the current function contains,
26637    or might contain a far jump.  */
26638 static int
26639 thumb_far_jump_used_p (void)
26640 {
26641   rtx_insn *insn;
26642   bool far_jump = false;
26643   unsigned int func_size = 0;
26644
26645   /* If we have already decided that far jumps may be used,
26646      do not bother checking again, and always return true even if
26647      it turns out that they are not being used.  Once we have made
26648      the decision that far jumps are present (and that hence the link
26649      register will be pushed onto the stack) we cannot go back on it.  */
26650   if (cfun->machine->far_jump_used)
26651     return 1;
26652
26653   /* If this function is not being called from the prologue/epilogue
26654      generation code then it must be being called from the
26655      INITIAL_ELIMINATION_OFFSET macro.  */
26656   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26657     {
26658       /* In this case we know that we are being asked about the elimination
26659          of the arg pointer register.  If that register is not being used,
26660          then there are no arguments on the stack, and we do not have to
26661          worry that a far jump might force the prologue to push the link
26662          register, changing the stack offsets.  In this case we can just
26663          return false, since the presence of far jumps in the function will
26664          not affect stack offsets.
26665
26666          If the arg pointer is live (or if it was live, but has now been
26667          eliminated and so set to dead) then we do have to test to see if
26668          the function might contain a far jump.  This test can lead to some
26669          false negatives, since before reload is completed, then length of
26670          branch instructions is not known, so gcc defaults to returning their
26671          longest length, which in turn sets the far jump attribute to true.
26672
26673          A false negative will not result in bad code being generated, but it
26674          will result in a needless push and pop of the link register.  We
26675          hope that this does not occur too often.
26676
26677          If we need doubleword stack alignment this could affect the other
26678          elimination offsets so we can't risk getting it wrong.  */
26679       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26680         cfun->machine->arg_pointer_live = 1;
26681       else if (!cfun->machine->arg_pointer_live)
26682         return 0;
26683     }
26684
26685   /* We should not change far_jump_used during or after reload, as there is
26686      no chance to change stack frame layout.  */
26687   if (reload_in_progress || reload_completed)
26688     return 0;
26689
26690   /* Check to see if the function contains a branch
26691      insn with the far jump attribute set.  */
26692   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26693     {
26694       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26695         {
26696           far_jump = true;
26697         }
26698       func_size += get_attr_length (insn);
26699     }
26700
26701   /* Attribute far_jump will always be true for thumb1 before
26702      shorten_branch pass.  So checking far_jump attribute before
26703      shorten_branch isn't much useful.
26704
26705      Following heuristic tries to estimate more accurately if a far jump
26706      may finally be used.  The heuristic is very conservative as there is
26707      no chance to roll-back the decision of not to use far jump.
26708
26709      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26710      2-byte insn is associated with a 4 byte constant pool.  Using
26711      function size 2048/3 as the threshold is conservative enough.  */
26712   if (far_jump)
26713     {
26714       if ((func_size * 3) >= 2048)
26715         {
26716           /* Record the fact that we have decided that
26717              the function does use far jumps.  */
26718           cfun->machine->far_jump_used = 1;
26719           return 1;
26720         }
26721     }
26722
26723   return 0;
26724 }
26725
26726 /* Return nonzero if FUNC must be entered in ARM mode.  */
26727 static bool
26728 is_called_in_ARM_mode (tree func)
26729 {
26730   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26731
26732   /* Ignore the problem about functions whose address is taken.  */
26733   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26734     return true;
26735
26736 #ifdef ARM_PE
26737   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26738 #else
26739   return false;
26740 #endif
26741 }
26742
26743 /* Given the stack offsets and register mask in OFFSETS, decide how
26744    many additional registers to push instead of subtracting a constant
26745    from SP.  For epilogues the principle is the same except we use pop.
26746    FOR_PROLOGUE indicates which we're generating.  */
26747 static int
26748 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26749 {
26750   HOST_WIDE_INT amount;
26751   unsigned long live_regs_mask = offsets->saved_regs_mask;
26752   /* Extract a mask of the ones we can give to the Thumb's push/pop
26753      instruction.  */
26754   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26755   /* Then count how many other high registers will need to be pushed.  */
26756   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26757   int n_free, reg_base, size;
26758
26759   if (!for_prologue && frame_pointer_needed)
26760     amount = offsets->locals_base - offsets->saved_regs;
26761   else
26762     amount = offsets->outgoing_args - offsets->saved_regs;
26763
26764   /* If the stack frame size is 512 exactly, we can save one load
26765      instruction, which should make this a win even when optimizing
26766      for speed.  */
26767   if (!optimize_size && amount != 512)
26768     return 0;
26769
26770   /* Can't do this if there are high registers to push.  */
26771   if (high_regs_pushed != 0)
26772     return 0;
26773
26774   /* Shouldn't do it in the prologue if no registers would normally
26775      be pushed at all.  In the epilogue, also allow it if we'll have
26776      a pop insn for the PC.  */
26777   if  (l_mask == 0
26778        && (for_prologue
26779            || TARGET_BACKTRACE
26780            || (live_regs_mask & 1 << LR_REGNUM) == 0
26781            || TARGET_INTERWORK
26782            || crtl->args.pretend_args_size != 0))
26783     return 0;
26784
26785   /* Don't do this if thumb_expand_prologue wants to emit instructions
26786      between the push and the stack frame allocation.  */
26787   if (for_prologue
26788       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26789           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26790     return 0;
26791
26792   reg_base = 0;
26793   n_free = 0;
26794   if (!for_prologue)
26795     {
26796       size = arm_size_return_regs ();
26797       reg_base = ARM_NUM_INTS (size);
26798       live_regs_mask >>= reg_base;
26799     }
26800
26801   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26802          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26803     {
26804       live_regs_mask >>= 1;
26805       n_free++;
26806     }
26807
26808   if (n_free == 0)
26809     return 0;
26810   gcc_assert (amount / 4 * 4 == amount);
26811
26812   if (amount >= 512 && (amount - n_free * 4) < 512)
26813     return (amount - 508) / 4;
26814   if (amount <= n_free * 4)
26815     return amount / 4;
26816   return 0;
26817 }
26818
26819 /* The bits which aren't usefully expanded as rtl.  */
26820 const char *
26821 thumb1_unexpanded_epilogue (void)
26822 {
26823   arm_stack_offsets *offsets;
26824   int regno;
26825   unsigned long live_regs_mask = 0;
26826   int high_regs_pushed = 0;
26827   int extra_pop;
26828   int had_to_push_lr;
26829   int size;
26830
26831   if (cfun->machine->return_used_this_function != 0)
26832     return "";
26833
26834   if (IS_NAKED (arm_current_func_type ()))
26835     return "";
26836
26837   offsets = arm_get_frame_offsets ();
26838   live_regs_mask = offsets->saved_regs_mask;
26839   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26840
26841   /* If we can deduce the registers used from the function's return value.
26842      This is more reliable that examining df_regs_ever_live_p () because that
26843      will be set if the register is ever used in the function, not just if
26844      the register is used to hold a return value.  */
26845   size = arm_size_return_regs ();
26846
26847   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26848   if (extra_pop > 0)
26849     {
26850       unsigned long extra_mask = (1 << extra_pop) - 1;
26851       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26852     }
26853
26854   /* The prolog may have pushed some high registers to use as
26855      work registers.  e.g. the testsuite file:
26856      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26857      compiles to produce:
26858         push    {r4, r5, r6, r7, lr}
26859         mov     r7, r9
26860         mov     r6, r8
26861         push    {r6, r7}
26862      as part of the prolog.  We have to undo that pushing here.  */
26863
26864   if (high_regs_pushed)
26865     {
26866       unsigned long mask = live_regs_mask & 0xff;
26867       int next_hi_reg;
26868
26869       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26870
26871       if (mask == 0)
26872         /* Oh dear!  We have no low registers into which we can pop
26873            high registers!  */
26874         internal_error
26875           ("no low registers available for popping high registers");
26876
26877       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26878         if (live_regs_mask & (1 << next_hi_reg))
26879           break;
26880
26881       while (high_regs_pushed)
26882         {
26883           /* Find lo register(s) into which the high register(s) can
26884              be popped.  */
26885           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26886             {
26887               if (mask & (1 << regno))
26888                 high_regs_pushed--;
26889               if (high_regs_pushed == 0)
26890                 break;
26891             }
26892
26893           if (high_regs_pushed == 0 && regno >= 0)
26894             mask &= ~((1 << regno) - 1);
26895
26896           /* Pop the values into the low register(s).  */
26897           thumb_pop (asm_out_file, mask);
26898
26899           /* Move the value(s) into the high registers.  */
26900           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26901             {
26902               if (mask & (1 << regno))
26903                 {
26904                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26905                                regno);
26906
26907                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26908                        next_hi_reg--)
26909                     if (live_regs_mask & (1 << next_hi_reg))
26910                       break;
26911                 }
26912             }
26913         }
26914       live_regs_mask &= ~0x0f00;
26915     }
26916
26917   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26918   live_regs_mask &= 0xff;
26919
26920   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26921     {
26922       /* Pop the return address into the PC.  */
26923       if (had_to_push_lr)
26924         live_regs_mask |= 1 << PC_REGNUM;
26925
26926       /* Either no argument registers were pushed or a backtrace
26927          structure was created which includes an adjusted stack
26928          pointer, so just pop everything.  */
26929       if (live_regs_mask)
26930         thumb_pop (asm_out_file, live_regs_mask);
26931
26932       /* We have either just popped the return address into the
26933          PC or it is was kept in LR for the entire function.
26934          Note that thumb_pop has already called thumb_exit if the
26935          PC was in the list.  */
26936       if (!had_to_push_lr)
26937         thumb_exit (asm_out_file, LR_REGNUM);
26938     }
26939   else
26940     {
26941       /* Pop everything but the return address.  */
26942       if (live_regs_mask)
26943         thumb_pop (asm_out_file, live_regs_mask);
26944
26945       if (had_to_push_lr)
26946         {
26947           if (size > 12)
26948             {
26949               /* We have no free low regs, so save one.  */
26950               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26951                            LAST_ARG_REGNUM);
26952             }
26953
26954           /* Get the return address into a temporary register.  */
26955           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26956
26957           if (size > 12)
26958             {
26959               /* Move the return address to lr.  */
26960               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26961                            LAST_ARG_REGNUM);
26962               /* Restore the low register.  */
26963               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26964                            IP_REGNUM);
26965               regno = LR_REGNUM;
26966             }
26967           else
26968             regno = LAST_ARG_REGNUM;
26969         }
26970       else
26971         regno = LR_REGNUM;
26972
26973       /* Remove the argument registers that were pushed onto the stack.  */
26974       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26975                    SP_REGNUM, SP_REGNUM,
26976                    crtl->args.pretend_args_size);
26977
26978       thumb_exit (asm_out_file, regno);
26979     }
26980
26981   return "";
26982 }
26983
26984 /* Functions to save and restore machine-specific function data.  */
26985 static struct machine_function *
26986 arm_init_machine_status (void)
26987 {
26988   struct machine_function *machine;
26989   machine = ggc_cleared_alloc<machine_function> ();
26990
26991 #if ARM_FT_UNKNOWN != 0
26992   machine->func_type = ARM_FT_UNKNOWN;
26993 #endif
26994   machine->static_chain_stack_bytes = -1;
26995   machine->pacspval_needed = 0;
26996   return machine;
26997 }
26998
26999 /* Return an RTX indicating where the return address to the
27000    calling function can be found.  */
27001 rtx
27002 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27003 {
27004   if (count != 0)
27005     return NULL_RTX;
27006
27007   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27008 }
27009
27010 /* Do anything needed before RTL is emitted for each function.  */
27011 void
27012 arm_init_expanders (void)
27013 {
27014   /* Arrange to initialize and mark the machine per-function status.  */
27015   init_machine_status = arm_init_machine_status;
27016
27017   /* This is to stop the combine pass optimizing away the alignment
27018      adjustment of va_arg.  */
27019   /* ??? It is claimed that this should not be necessary.  */
27020   if (cfun)
27021     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27022 }
27023
27024 /* Check that FUNC is called with a different mode.  */
27025
27026 bool
27027 arm_change_mode_p (tree func)
27028 {
27029   if (TREE_CODE (func) != FUNCTION_DECL)
27030     return false;
27031
27032   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
27033
27034   if (!callee_tree)
27035     callee_tree = target_option_default_node;
27036
27037   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
27038   int flags = callee_opts->x_target_flags;
27039
27040   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
27041 }
27042
27043 /* Like arm_compute_initial_elimination offset.  Simpler because there
27044    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
27045    to point at the base of the local variables after static stack
27046    space for a function has been allocated.  */
27047
27048 HOST_WIDE_INT
27049 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27050 {
27051   arm_stack_offsets *offsets;
27052
27053   offsets = arm_get_frame_offsets ();
27054
27055   switch (from)
27056     {
27057     case ARG_POINTER_REGNUM:
27058       switch (to)
27059         {
27060         case STACK_POINTER_REGNUM:
27061           return offsets->outgoing_args - offsets->saved_args;
27062
27063         case FRAME_POINTER_REGNUM:
27064           return offsets->soft_frame - offsets->saved_args;
27065
27066         case ARM_HARD_FRAME_POINTER_REGNUM:
27067           return offsets->saved_regs - offsets->saved_args;
27068
27069         case THUMB_HARD_FRAME_POINTER_REGNUM:
27070           return offsets->locals_base - offsets->saved_args;
27071
27072         default:
27073           gcc_unreachable ();
27074         }
27075       break;
27076
27077     case FRAME_POINTER_REGNUM:
27078       switch (to)
27079         {
27080         case STACK_POINTER_REGNUM:
27081           return offsets->outgoing_args - offsets->soft_frame;
27082
27083         case ARM_HARD_FRAME_POINTER_REGNUM:
27084           return offsets->saved_regs - offsets->soft_frame;
27085
27086         case THUMB_HARD_FRAME_POINTER_REGNUM:
27087           return offsets->locals_base - offsets->soft_frame;
27088
27089         default:
27090           gcc_unreachable ();
27091         }
27092       break;
27093
27094     default:
27095       gcc_unreachable ();
27096     }
27097 }
27098
27099 /* Generate the function's prologue.  */
27100
27101 void
27102 thumb1_expand_prologue (void)
27103 {
27104   rtx_insn *insn;
27105
27106   HOST_WIDE_INT amount;
27107   HOST_WIDE_INT size;
27108   arm_stack_offsets *offsets;
27109   unsigned long func_type;
27110   int regno;
27111   unsigned long live_regs_mask;
27112   unsigned long l_mask;
27113   unsigned high_regs_pushed = 0;
27114   bool lr_needs_saving;
27115
27116   func_type = arm_current_func_type ();
27117
27118   /* Naked functions don't have prologues.  */
27119   if (IS_NAKED (func_type))
27120     {
27121       if (flag_stack_usage_info)
27122         current_function_static_stack_size = 0;
27123       return;
27124     }
27125
27126   if (IS_INTERRUPT (func_type))
27127     {
27128       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27129       return;
27130     }
27131
27132   if (is_called_in_ARM_mode (current_function_decl))
27133     emit_insn (gen_prologue_thumb1_interwork ());
27134
27135   offsets = arm_get_frame_offsets ();
27136   live_regs_mask = offsets->saved_regs_mask;
27137   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27138
27139   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27140   l_mask = live_regs_mask & 0x40ff;
27141   /* Then count how many other high registers will need to be pushed.  */
27142   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27143
27144   if (crtl->args.pretend_args_size)
27145     {
27146       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27147
27148       if (cfun->machine->uses_anonymous_args)
27149         {
27150           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27151           unsigned long mask;
27152
27153           mask = 1ul << (LAST_ARG_REGNUM + 1);
27154           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27155
27156           insn = thumb1_emit_multi_reg_push (mask, 0);
27157         }
27158       else
27159         {
27160           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27161                                         stack_pointer_rtx, x));
27162         }
27163       RTX_FRAME_RELATED_P (insn) = 1;
27164     }
27165
27166   if (TARGET_BACKTRACE)
27167     {
27168       HOST_WIDE_INT offset = 0;
27169       unsigned work_register;
27170       rtx work_reg, x, arm_hfp_rtx;
27171
27172       /* We have been asked to create a stack backtrace structure.
27173          The code looks like this:
27174
27175          0   .align 2
27176          0   func:
27177          0     sub   SP, #16         Reserve space for 4 registers.
27178          2     push  {R7}            Push low registers.
27179          4     add   R7, SP, #20     Get the stack pointer before the push.
27180          6     str   R7, [SP, #8]    Store the stack pointer
27181                                         (before reserving the space).
27182          8     mov   R7, PC          Get hold of the start of this code + 12.
27183         10     str   R7, [SP, #16]   Store it.
27184         12     mov   R7, FP          Get hold of the current frame pointer.
27185         14     str   R7, [SP, #4]    Store it.
27186         16     mov   R7, LR          Get hold of the current return address.
27187         18     str   R7, [SP, #12]   Store it.
27188         20     add   R7, SP, #16     Point at the start of the
27189                                         backtrace structure.
27190         22     mov   FP, R7          Put this value into the frame pointer.  */
27191
27192       work_register = thumb_find_work_register (live_regs_mask);
27193       work_reg = gen_rtx_REG (SImode, work_register);
27194       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27195
27196       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27197                                     stack_pointer_rtx, GEN_INT (-16)));
27198       RTX_FRAME_RELATED_P (insn) = 1;
27199
27200       if (l_mask)
27201         {
27202           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27203           RTX_FRAME_RELATED_P (insn) = 1;
27204           lr_needs_saving = false;
27205
27206           offset = bit_count (l_mask) * UNITS_PER_WORD;
27207         }
27208
27209       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27210       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27211
27212       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27213       x = gen_frame_mem (SImode, x);
27214       emit_move_insn (x, work_reg);
27215
27216       /* Make sure that the instruction fetching the PC is in the right place
27217          to calculate "start of backtrace creation code + 12".  */
27218       /* ??? The stores using the common WORK_REG ought to be enough to
27219          prevent the scheduler from doing anything weird.  Failing that
27220          we could always move all of the following into an UNSPEC_VOLATILE.  */
27221       if (l_mask)
27222         {
27223           x = gen_rtx_REG (SImode, PC_REGNUM);
27224           emit_move_insn (work_reg, x);
27225
27226           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27227           x = gen_frame_mem (SImode, x);
27228           emit_move_insn (x, work_reg);
27229
27230           emit_move_insn (work_reg, arm_hfp_rtx);
27231
27232           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27233           x = gen_frame_mem (SImode, x);
27234           emit_move_insn (x, work_reg);
27235         }
27236       else
27237         {
27238           emit_move_insn (work_reg, arm_hfp_rtx);
27239
27240           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27241           x = gen_frame_mem (SImode, x);
27242           emit_move_insn (x, work_reg);
27243
27244           x = gen_rtx_REG (SImode, PC_REGNUM);
27245           emit_move_insn (work_reg, x);
27246
27247           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27248           x = gen_frame_mem (SImode, x);
27249           emit_move_insn (x, work_reg);
27250         }
27251
27252       x = gen_rtx_REG (SImode, LR_REGNUM);
27253       emit_move_insn (work_reg, x);
27254
27255       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27256       x = gen_frame_mem (SImode, x);
27257       emit_move_insn (x, work_reg);
27258
27259       x = GEN_INT (offset + 12);
27260       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27261
27262       emit_move_insn (arm_hfp_rtx, work_reg);
27263     }
27264   /* Optimization:  If we are not pushing any low registers but we are going
27265      to push some high registers then delay our first push.  This will just
27266      be a push of LR and we can combine it with the push of the first high
27267      register.  */
27268   else if ((l_mask & 0xff) != 0
27269            || (high_regs_pushed == 0 && lr_needs_saving))
27270     {
27271       unsigned long mask = l_mask;
27272       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27273       insn = thumb1_emit_multi_reg_push (mask, mask);
27274       RTX_FRAME_RELATED_P (insn) = 1;
27275       lr_needs_saving = false;
27276     }
27277
27278   if (high_regs_pushed)
27279     {
27280       unsigned pushable_regs;
27281       unsigned next_hi_reg;
27282       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27283                                                  : crtl->args.info.nregs;
27284       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27285
27286       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27287         if (live_regs_mask & (1 << next_hi_reg))
27288           break;
27289
27290       /* Here we need to mask out registers used for passing arguments
27291          even if they can be pushed.  This is to avoid using them to
27292          stash the high registers.  Such kind of stash may clobber the
27293          use of arguments.  */
27294       pushable_regs = l_mask & (~arg_regs_mask);
27295       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27296
27297       /* Normally, LR can be used as a scratch register once it has been
27298          saved; but if the function examines its own return address then
27299          the value is still live and we need to avoid using it.  */
27300       bool return_addr_live
27301         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27302                            LR_REGNUM);
27303
27304       if (lr_needs_saving || return_addr_live)
27305         pushable_regs &= ~(1 << LR_REGNUM);
27306
27307       if (pushable_regs == 0)
27308         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27309
27310       while (high_regs_pushed > 0)
27311         {
27312           unsigned long real_regs_mask = 0;
27313           unsigned long push_mask = 0;
27314
27315           for (regno = LR_REGNUM; regno >= 0; regno --)
27316             {
27317               if (pushable_regs & (1 << regno))
27318                 {
27319                   emit_move_insn (gen_rtx_REG (SImode, regno),
27320                                   gen_rtx_REG (SImode, next_hi_reg));
27321
27322                   high_regs_pushed --;
27323                   real_regs_mask |= (1 << next_hi_reg);
27324                   push_mask |= (1 << regno);
27325
27326                   if (high_regs_pushed)
27327                     {
27328                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27329                            next_hi_reg --)
27330                         if (live_regs_mask & (1 << next_hi_reg))
27331                           break;
27332                     }
27333                   else
27334                     break;
27335                 }
27336             }
27337
27338           /* If we had to find a work register and we have not yet
27339              saved the LR then add it to the list of regs to push.  */
27340           if (lr_needs_saving)
27341             {
27342               push_mask |= 1 << LR_REGNUM;
27343               real_regs_mask |= 1 << LR_REGNUM;
27344               lr_needs_saving = false;
27345               /* If the return address is not live at this point, we
27346                  can add LR to the list of registers that we can use
27347                  for pushes.  */
27348               if (!return_addr_live)
27349                 pushable_regs |= 1 << LR_REGNUM;
27350             }
27351
27352           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27353           RTX_FRAME_RELATED_P (insn) = 1;
27354         }
27355     }
27356
27357   /* Load the pic register before setting the frame pointer,
27358      so we can use r7 as a temporary work register.  */
27359   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27360     arm_load_pic_register (live_regs_mask, NULL_RTX);
27361
27362   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27363     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27364                     stack_pointer_rtx);
27365
27366   size = offsets->outgoing_args - offsets->saved_args;
27367   if (flag_stack_usage_info)
27368     current_function_static_stack_size = size;
27369
27370   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
27371   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27372        || flag_stack_clash_protection)
27373       && size)
27374     sorry ("%<-fstack-check=specific%> for Thumb-1");
27375
27376   amount = offsets->outgoing_args - offsets->saved_regs;
27377   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27378   if (amount)
27379     {
27380       if (amount < 512)
27381         {
27382           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27383                                         GEN_INT (- amount)));
27384           RTX_FRAME_RELATED_P (insn) = 1;
27385         }
27386       else
27387         {
27388           rtx reg, dwarf;
27389
27390           /* The stack decrement is too big for an immediate value in a single
27391              insn.  In theory we could issue multiple subtracts, but after
27392              three of them it becomes more space efficient to place the full
27393              value in the constant pool and load into a register.  (Also the
27394              ARM debugger really likes to see only one stack decrement per
27395              function).  So instead we look for a scratch register into which
27396              we can load the decrement, and then we subtract this from the
27397              stack pointer.  Unfortunately on the thumb the only available
27398              scratch registers are the argument registers, and we cannot use
27399              these as they may hold arguments to the function.  Instead we
27400              attempt to locate a call preserved register which is used by this
27401              function.  If we can find one, then we know that it will have
27402              been pushed at the start of the prologue and so we can corrupt
27403              it now.  */
27404           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27405             if (live_regs_mask & (1 << regno))
27406               break;
27407
27408           gcc_assert(regno <= LAST_LO_REGNUM);
27409
27410           reg = gen_rtx_REG (SImode, regno);
27411
27412           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27413
27414           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27415                                         stack_pointer_rtx, reg));
27416
27417           dwarf = gen_rtx_SET (stack_pointer_rtx,
27418                                plus_constant (Pmode, stack_pointer_rtx,
27419                                               -amount));
27420           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27421           RTX_FRAME_RELATED_P (insn) = 1;
27422         }
27423     }
27424
27425   if (frame_pointer_needed)
27426     thumb_set_frame_pointer (offsets);
27427
27428   /* If we are profiling, make sure no instructions are scheduled before
27429      the call to mcount.  Similarly if the user has requested no
27430      scheduling in the prolog.  Similarly if we want non-call exceptions
27431      using the EABI unwinder, to prevent faulting instructions from being
27432      swapped with a stack adjustment.  */
27433   if (crtl->profile || !TARGET_SCHED_PROLOG
27434       || (arm_except_unwind_info (&global_options) == UI_TARGET
27435           && cfun->can_throw_non_call_exceptions))
27436     emit_insn (gen_blockage ());
27437
27438   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27439   if (live_regs_mask & 0xff)
27440     cfun->machine->lr_save_eliminated = 0;
27441 }
27442
27443 /* Clear caller saved registers not used to pass return values and leaked
27444    condition flags before exiting a cmse_nonsecure_entry function.  */
27445
27446 void
27447 cmse_nonsecure_entry_clear_before_return (void)
27448 {
27449   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27450   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27451   uint32_t padding_bits_to_clear = 0;
27452   auto_sbitmap to_clear_bitmap (maxregno + 1);
27453   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27454   tree result_type;
27455
27456   bitmap_clear (to_clear_bitmap);
27457   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27458   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27459
27460   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27461      registers.  */
27462   if (clear_vfpregs)
27463     {
27464       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27465
27466       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27467
27468       if (!TARGET_HAVE_FPCXT_CMSE)
27469         {
27470           /* Make sure we don't clear the two scratch registers used to clear
27471              the relevant FPSCR bits in output_return_instruction.  */
27472           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27473           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27474           emit_use (gen_rtx_REG (SImode, 4));
27475           bitmap_clear_bit (to_clear_bitmap, 4);
27476         }
27477     }
27478
27479   /* If the user has defined registers to be caller saved, these are no longer
27480      restored by the function before returning and must thus be cleared for
27481      security purposes.  */
27482   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27483     {
27484       /* We do not touch registers that can be used to pass arguments as per
27485          the AAPCS, since these should never be made callee-saved by user
27486          options.  */
27487       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27488         continue;
27489       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27490         continue;
27491       if (!callee_saved_reg_p (regno)
27492           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27493               || TARGET_HARD_FLOAT))
27494         bitmap_set_bit (to_clear_bitmap, regno);
27495     }
27496
27497   /* Make sure we do not clear the registers used to return the result in.  */
27498   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27499   if (!VOID_TYPE_P (result_type))
27500     {
27501       uint64_t to_clear_return_mask;
27502       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27503
27504       /* No need to check that we return in registers, because we don't
27505          support returning on stack yet.  */
27506       gcc_assert (REG_P (result_rtl));
27507       to_clear_return_mask
27508         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27509                                      &padding_bits_to_clear);
27510       if (to_clear_return_mask)
27511         {
27512           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27513           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27514             {
27515               if (to_clear_return_mask & (1ULL << regno))
27516                 bitmap_clear_bit (to_clear_bitmap, regno);
27517             }
27518         }
27519     }
27520
27521   if (padding_bits_to_clear != 0)
27522     {
27523       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27524       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27525
27526       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27527          returning a composite type, which only uses r0.  Let's make sure that
27528          r1-r3 is cleared too.  */
27529       bitmap_clear (to_clear_arg_regs_bitmap);
27530       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27531       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27532     }
27533
27534   /* Clear full registers that leak before returning.  */
27535   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27536   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27537   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27538                         clearing_reg);
27539 }
27540
27541 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27542    POP instruction can be generated.  LR should be replaced by PC.  All
27543    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27544    all we really need to check here is if single register is to be
27545    returned, or multiple register return.  */
27546 void
27547 thumb2_expand_return (bool simple_return)
27548 {
27549   int i, num_regs;
27550   unsigned long saved_regs_mask;
27551   arm_stack_offsets *offsets;
27552
27553   offsets = arm_get_frame_offsets ();
27554   saved_regs_mask = offsets->saved_regs_mask;
27555
27556   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27557     if (saved_regs_mask & (1 << i))
27558       num_regs++;
27559
27560   if (!simple_return && saved_regs_mask)
27561     {
27562       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27563          functions or adapt code to handle according to ACLE.  This path should
27564          not be reachable for cmse_nonsecure_entry functions though we prefer
27565          to assert it for now to ensure that future code changes do not silently
27566          change this behavior.  */
27567       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27568       if (arm_current_function_pac_enabled_p ())
27569         {
27570           gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27571           arm_emit_multi_reg_pop (saved_regs_mask);
27572           emit_insn (gen_aut_nop ());
27573           emit_jump_insn (simple_return_rtx);
27574         }
27575       else if (num_regs == 1)
27576         {
27577           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27578           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27579           rtx addr = gen_rtx_MEM (SImode,
27580                                   gen_rtx_POST_INC (SImode,
27581                                                     stack_pointer_rtx));
27582           set_mem_alias_set (addr, get_frame_alias_set ());
27583           XVECEXP (par, 0, 0) = ret_rtx;
27584           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27585           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27586           emit_jump_insn (par);
27587         }
27588       else
27589         {
27590           saved_regs_mask &= ~ (1 << LR_REGNUM);
27591           saved_regs_mask |=   (1 << PC_REGNUM);
27592           arm_emit_multi_reg_pop (saved_regs_mask);
27593         }
27594     }
27595   else
27596     {
27597       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27598         cmse_nonsecure_entry_clear_before_return ();
27599       emit_jump_insn (simple_return_rtx);
27600     }
27601 }
27602
27603 void
27604 thumb1_expand_epilogue (void)
27605 {
27606   HOST_WIDE_INT amount;
27607   arm_stack_offsets *offsets;
27608   int regno;
27609
27610   /* Naked functions don't have prologues.  */
27611   if (IS_NAKED (arm_current_func_type ()))
27612     return;
27613
27614   offsets = arm_get_frame_offsets ();
27615   amount = offsets->outgoing_args - offsets->saved_regs;
27616
27617   if (frame_pointer_needed)
27618     {
27619       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27620       amount = offsets->locals_base - offsets->saved_regs;
27621     }
27622   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27623
27624   gcc_assert (amount >= 0);
27625   if (amount)
27626     {
27627       emit_insn (gen_blockage ());
27628
27629       if (amount < 512)
27630         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27631                                GEN_INT (amount)));
27632       else
27633         {
27634           /* r3 is always free in the epilogue.  */
27635           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27636
27637           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27638           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27639         }
27640     }
27641
27642   /* Emit a USE (stack_pointer_rtx), so that
27643      the stack adjustment will not be deleted.  */
27644   emit_insn (gen_force_register_use (stack_pointer_rtx));
27645
27646   if (crtl->profile || !TARGET_SCHED_PROLOG)
27647     emit_insn (gen_blockage ());
27648
27649   /* Emit a clobber for each insn that will be restored in the epilogue,
27650      so that flow2 will get register lifetimes correct.  */
27651   for (regno = 0; regno < 13; regno++)
27652     if (reg_needs_saving_p (regno))
27653       emit_clobber (gen_rtx_REG (SImode, regno));
27654
27655   if (! df_regs_ever_live_p (LR_REGNUM))
27656     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27657
27658   /* Clear all caller-saved regs that are not used to return.  */
27659   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27660     cmse_nonsecure_entry_clear_before_return ();
27661 }
27662
27663 /* Epilogue code for APCS frame.  */
27664 static void
27665 arm_expand_epilogue_apcs_frame (bool really_return)
27666 {
27667   unsigned long func_type;
27668   unsigned long saved_regs_mask;
27669   int num_regs = 0;
27670   int i;
27671   int floats_from_frame = 0;
27672   arm_stack_offsets *offsets;
27673
27674   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27675   func_type = arm_current_func_type ();
27676
27677   /* Get frame offsets for ARM.  */
27678   offsets = arm_get_frame_offsets ();
27679   saved_regs_mask = offsets->saved_regs_mask;
27680
27681   /* Find the offset of the floating-point save area in the frame.  */
27682   floats_from_frame
27683     = (offsets->saved_args
27684        + arm_compute_static_chain_stack_bytes ()
27685        - offsets->frame);
27686
27687   /* Compute how many core registers saved and how far away the floats are.  */
27688   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27689     if (saved_regs_mask & (1 << i))
27690       {
27691         num_regs++;
27692         floats_from_frame += 4;
27693       }
27694
27695   if (TARGET_VFP_BASE)
27696     {
27697       int start_reg;
27698       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27699
27700       /* The offset is from IP_REGNUM.  */
27701       int saved_size = arm_get_vfp_saved_size ();
27702       if (saved_size > 0)
27703         {
27704           rtx_insn *insn;
27705           floats_from_frame += saved_size;
27706           insn = emit_insn (gen_addsi3 (ip_rtx,
27707                                         hard_frame_pointer_rtx,
27708                                         GEN_INT (-floats_from_frame)));
27709           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27710                                        ip_rtx, hard_frame_pointer_rtx);
27711         }
27712
27713       /* Generate VFP register multi-pop.  */
27714       start_reg = FIRST_VFP_REGNUM;
27715
27716       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27717         /* Look for a case where a reg does not need restoring.  */
27718         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27719           {
27720             if (start_reg != i)
27721               arm_emit_vfp_multi_reg_pop (start_reg,
27722                                           (i - start_reg) / 2,
27723                                           gen_rtx_REG (SImode,
27724                                                        IP_REGNUM));
27725             start_reg = i + 2;
27726           }
27727
27728       /* Restore the remaining regs that we have discovered (or possibly
27729          even all of them, if the conditional in the for loop never
27730          fired).  */
27731       if (start_reg != i)
27732         arm_emit_vfp_multi_reg_pop (start_reg,
27733                                     (i - start_reg) / 2,
27734                                     gen_rtx_REG (SImode, IP_REGNUM));
27735     }
27736
27737   if (TARGET_IWMMXT)
27738     {
27739       /* The frame pointer is guaranteed to be non-double-word aligned, as
27740          it is set to double-word-aligned old_stack_pointer - 4.  */
27741       rtx_insn *insn;
27742       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27743
27744       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27745         if (reg_needs_saving_p (i))
27746           {
27747             rtx addr = gen_frame_mem (V2SImode,
27748                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27749                                                 - lrm_count * 4));
27750             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27751             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27752                                                gen_rtx_REG (V2SImode, i),
27753                                                NULL_RTX);
27754             lrm_count += 2;
27755           }
27756     }
27757
27758   /* saved_regs_mask should contain IP which contains old stack pointer
27759      at the time of activation creation.  Since SP and IP are adjacent registers,
27760      we can restore the value directly into SP.  */
27761   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27762   saved_regs_mask &= ~(1 << IP_REGNUM);
27763   saved_regs_mask |= (1 << SP_REGNUM);
27764
27765   /* There are two registers left in saved_regs_mask - LR and PC.  We
27766      only need to restore LR (the return address), but to
27767      save time we can load it directly into PC, unless we need a
27768      special function exit sequence, or we are not really returning.  */
27769   if (really_return
27770       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27771       && !crtl->calls_eh_return)
27772     /* Delete LR from the register mask, so that LR on
27773        the stack is loaded into the PC in the register mask.  */
27774     saved_regs_mask &= ~(1 << LR_REGNUM);
27775   else
27776     saved_regs_mask &= ~(1 << PC_REGNUM);
27777
27778   num_regs = bit_count (saved_regs_mask);
27779   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27780     {
27781       rtx_insn *insn;
27782       emit_insn (gen_blockage ());
27783       /* Unwind the stack to just below the saved registers.  */
27784       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27785                                     hard_frame_pointer_rtx,
27786                                     GEN_INT (- 4 * num_regs)));
27787
27788       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27789                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27790     }
27791
27792   arm_emit_multi_reg_pop (saved_regs_mask);
27793
27794   if (IS_INTERRUPT (func_type))
27795     {
27796       /* Interrupt handlers will have pushed the
27797          IP onto the stack, so restore it now.  */
27798       rtx_insn *insn;
27799       rtx addr = gen_rtx_MEM (SImode,
27800                               gen_rtx_POST_INC (SImode,
27801                               stack_pointer_rtx));
27802       set_mem_alias_set (addr, get_frame_alias_set ());
27803       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27804       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27805                                          gen_rtx_REG (SImode, IP_REGNUM),
27806                                          NULL_RTX);
27807     }
27808
27809   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27810     return;
27811
27812   if (crtl->calls_eh_return)
27813     emit_insn (gen_addsi3 (stack_pointer_rtx,
27814                            stack_pointer_rtx,
27815                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27816
27817   if (IS_STACKALIGN (func_type))
27818     /* Restore the original stack pointer.  Before prologue, the stack was
27819        realigned and the original stack pointer saved in r0.  For details,
27820        see comment in arm_expand_prologue.  */
27821     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27822
27823   emit_jump_insn (simple_return_rtx);
27824 }
27825
27826 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27827    function is not a sibcall.  */
27828 void
27829 arm_expand_epilogue (bool really_return)
27830 {
27831   unsigned long func_type;
27832   unsigned long saved_regs_mask;
27833   int num_regs = 0;
27834   int i;
27835   int amount;
27836   arm_stack_offsets *offsets;
27837
27838   func_type = arm_current_func_type ();
27839
27840   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27841      let output_return_instruction take care of instruction emission if any.  */
27842   if (IS_NAKED (func_type)
27843       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27844     {
27845       if (really_return)
27846         emit_jump_insn (simple_return_rtx);
27847       return;
27848     }
27849
27850   /* If we are throwing an exception, then we really must be doing a
27851      return, so we can't tail-call.  */
27852   gcc_assert (!crtl->calls_eh_return || really_return);
27853
27854   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27855     {
27856       arm_expand_epilogue_apcs_frame (really_return);
27857       return;
27858     }
27859
27860   /* Get frame offsets for ARM.  */
27861   offsets = arm_get_frame_offsets ();
27862   saved_regs_mask = offsets->saved_regs_mask;
27863   num_regs = bit_count (saved_regs_mask);
27864
27865   if (frame_pointer_needed)
27866     {
27867       rtx_insn *insn;
27868       /* Restore stack pointer if necessary.  */
27869       if (TARGET_ARM)
27870         {
27871           /* In ARM mode, frame pointer points to first saved register.
27872              Restore stack pointer to last saved register.  */
27873           amount = offsets->frame - offsets->saved_regs;
27874
27875           /* Force out any pending memory operations that reference stacked data
27876              before stack de-allocation occurs.  */
27877           emit_insn (gen_blockage ());
27878           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27879                             hard_frame_pointer_rtx,
27880                             GEN_INT (amount)));
27881           arm_add_cfa_adjust_cfa_note (insn, amount,
27882                                        stack_pointer_rtx,
27883                                        hard_frame_pointer_rtx);
27884
27885           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27886              deleted.  */
27887           emit_insn (gen_force_register_use (stack_pointer_rtx));
27888         }
27889       else
27890         {
27891           /* In Thumb-2 mode, the frame pointer points to the last saved
27892              register.  */
27893           amount = offsets->locals_base - offsets->saved_regs;
27894           if (amount)
27895             {
27896               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27897                                 hard_frame_pointer_rtx,
27898                                 GEN_INT (amount)));
27899               arm_add_cfa_adjust_cfa_note (insn, amount,
27900                                            hard_frame_pointer_rtx,
27901                                            hard_frame_pointer_rtx);
27902             }
27903
27904           /* Force out any pending memory operations that reference stacked data
27905              before stack de-allocation occurs.  */
27906           emit_insn (gen_blockage ());
27907           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27908                                        hard_frame_pointer_rtx));
27909           arm_add_cfa_adjust_cfa_note (insn, 0,
27910                                        stack_pointer_rtx,
27911                                        hard_frame_pointer_rtx);
27912           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27913              deleted.  */
27914           emit_insn (gen_force_register_use (stack_pointer_rtx));
27915         }
27916     }
27917   else
27918     {
27919       /* Pop off outgoing args and local frame to adjust stack pointer to
27920          last saved register.  */
27921       amount = offsets->outgoing_args - offsets->saved_regs;
27922       if (amount)
27923         {
27924           rtx_insn *tmp;
27925           /* Force out any pending memory operations that reference stacked data
27926              before stack de-allocation occurs.  */
27927           emit_insn (gen_blockage ());
27928           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27929                                        stack_pointer_rtx,
27930                                        GEN_INT (amount)));
27931           arm_add_cfa_adjust_cfa_note (tmp, amount,
27932                                        stack_pointer_rtx, stack_pointer_rtx);
27933           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27934              not deleted.  */
27935           emit_insn (gen_force_register_use (stack_pointer_rtx));
27936         }
27937     }
27938
27939   if (TARGET_VFP_BASE)
27940     {
27941       /* Generate VFP register multi-pop.  */
27942       int end_reg = LAST_VFP_REGNUM + 1;
27943
27944       /* Scan the registers in reverse order.  We need to match
27945          any groupings made in the prologue and generate matching
27946          vldm operations.  The need to match groups is because,
27947          unlike pop, vldm can only do consecutive regs.  */
27948       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27949         /* Look for a case where a reg does not need restoring.  */
27950         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27951           {
27952             /* Restore the regs discovered so far (from reg+2 to
27953                end_reg).  */
27954             if (end_reg > i + 2)
27955               arm_emit_vfp_multi_reg_pop (i + 2,
27956                                           (end_reg - (i + 2)) / 2,
27957                                           stack_pointer_rtx);
27958             end_reg = i;
27959           }
27960
27961       /* Restore the remaining regs that we have discovered (or possibly
27962          even all of them, if the conditional in the for loop never
27963          fired).  */
27964       if (end_reg > i + 2)
27965         arm_emit_vfp_multi_reg_pop (i + 2,
27966                                     (end_reg - (i + 2)) / 2,
27967                                     stack_pointer_rtx);
27968     }
27969
27970   if (TARGET_IWMMXT)
27971     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27972       if (reg_needs_saving_p (i))
27973         {
27974           rtx_insn *insn;
27975           rtx addr = gen_rtx_MEM (V2SImode,
27976                                   gen_rtx_POST_INC (SImode,
27977                                                     stack_pointer_rtx));
27978           set_mem_alias_set (addr, get_frame_alias_set ());
27979           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27980           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27981                                              gen_rtx_REG (V2SImode, i),
27982                                              NULL_RTX);
27983           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27984                                        stack_pointer_rtx, stack_pointer_rtx);
27985         }
27986
27987   if (saved_regs_mask)
27988     {
27989       rtx insn;
27990       bool return_in_pc = false;
27991
27992       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27993           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27994           && !IS_CMSE_ENTRY (func_type)
27995           && !IS_STACKALIGN (func_type)
27996           && really_return
27997           && crtl->args.pretend_args_size == 0
27998           && saved_regs_mask & (1 << LR_REGNUM)
27999           && !crtl->calls_eh_return
28000           && !arm_current_function_pac_enabled_p ())
28001         {
28002           saved_regs_mask &= ~(1 << LR_REGNUM);
28003           saved_regs_mask |= (1 << PC_REGNUM);
28004           return_in_pc = true;
28005         }
28006
28007       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
28008         {
28009           for (i = 0; i <= LAST_ARM_REGNUM; i++)
28010             if (saved_regs_mask & (1 << i))
28011               {
28012                 rtx addr = gen_rtx_MEM (SImode,
28013                                         gen_rtx_POST_INC (SImode,
28014                                                           stack_pointer_rtx));
28015                 set_mem_alias_set (addr, get_frame_alias_set ());
28016
28017                 if (i == PC_REGNUM)
28018                   {
28019                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
28020                     XVECEXP (insn, 0, 0) = ret_rtx;
28021                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
28022                                                         addr);
28023                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
28024                     insn = emit_jump_insn (insn);
28025                   }
28026                 else
28027                   {
28028                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
28029                                                  addr));
28030                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28031                                                        gen_rtx_REG (SImode, i),
28032                                                        NULL_RTX);
28033                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28034                                                  stack_pointer_rtx,
28035                                                  stack_pointer_rtx);
28036                   }
28037               }
28038         }
28039       else
28040         {
28041           if (TARGET_LDRD
28042               && current_tune->prefer_ldrd_strd
28043               && !optimize_function_for_size_p (cfun))
28044             {
28045               if (TARGET_THUMB2)
28046                 thumb2_emit_ldrd_pop (saved_regs_mask);
28047               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28048                 arm_emit_ldrd_pop (saved_regs_mask);
28049               else
28050                 arm_emit_multi_reg_pop (saved_regs_mask);
28051             }
28052           else
28053             arm_emit_multi_reg_pop (saved_regs_mask);
28054         }
28055
28056       if (return_in_pc)
28057         return;
28058     }
28059
28060   amount
28061     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28062   if (amount)
28063     {
28064       int i, j;
28065       rtx dwarf = NULL_RTX;
28066       rtx_insn *tmp =
28067         emit_insn (gen_addsi3 (stack_pointer_rtx,
28068                                stack_pointer_rtx,
28069                                GEN_INT (amount)));
28070
28071       RTX_FRAME_RELATED_P (tmp) = 1;
28072
28073       if (cfun->machine->uses_anonymous_args)
28074         {
28075           /* Restore pretend args.  Refer arm_expand_prologue on how to save
28076              pretend_args in stack.  */
28077           int num_regs = crtl->args.pretend_args_size / 4;
28078           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28079           for (j = 0, i = 0; j < num_regs; i++)
28080             if (saved_regs_mask & (1 << i))
28081               {
28082                 rtx reg = gen_rtx_REG (SImode, i);
28083                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28084                 j++;
28085               }
28086           REG_NOTES (tmp) = dwarf;
28087         }
28088       arm_add_cfa_adjust_cfa_note (tmp, amount,
28089                                    stack_pointer_rtx, stack_pointer_rtx);
28090     }
28091
28092   if (IS_CMSE_ENTRY (func_type))
28093     {
28094       /* CMSE_ENTRY always returns.  */
28095       gcc_assert (really_return);
28096       /* Clear all caller-saved regs that are not used to return.  */
28097       cmse_nonsecure_entry_clear_before_return ();
28098
28099       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28100          VLDR.  */
28101       if (TARGET_HAVE_FPCXT_CMSE)
28102         {
28103           rtx_insn *insn;
28104
28105           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28106                                                    GEN_INT (FPCXTNS_ENUM)));
28107           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28108                                   plus_constant (Pmode, stack_pointer_rtx, 4));
28109           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28110           RTX_FRAME_RELATED_P (insn) = 1;
28111         }
28112     }
28113
28114   if (arm_current_function_pac_enabled_p ())
28115     emit_insn (gen_aut_nop ());
28116
28117   if (!really_return)
28118     return;
28119
28120   if (crtl->calls_eh_return)
28121     emit_insn (gen_addsi3 (stack_pointer_rtx,
28122                            stack_pointer_rtx,
28123                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28124
28125   if (IS_STACKALIGN (func_type))
28126     /* Restore the original stack pointer.  Before prologue, the stack was
28127        realigned and the original stack pointer saved in r0.  For details,
28128        see comment in arm_expand_prologue.  */
28129     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28130
28131   emit_jump_insn (simple_return_rtx);
28132 }
28133
28134 /* Implementation of insn prologue_thumb1_interwork.  This is the first
28135    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
28136
28137 const char *
28138 thumb1_output_interwork (void)
28139 {
28140   const char * name;
28141   FILE *f = asm_out_file;
28142
28143   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28144   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28145               == SYMBOL_REF);
28146   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28147
28148   /* Generate code sequence to switch us into Thumb mode.  */
28149   /* The .code 32 directive has already been emitted by
28150      ASM_DECLARE_FUNCTION_NAME.  */
28151   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28152   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28153
28154   /* Generate a label, so that the debugger will notice the
28155      change in instruction sets.  This label is also used by
28156      the assembler to bypass the ARM code when this function
28157      is called from a Thumb encoded function elsewhere in the
28158      same file.  Hence the definition of STUB_NAME here must
28159      agree with the definition in gas/config/tc-arm.c.  */
28160
28161 #define STUB_NAME ".real_start_of"
28162
28163   fprintf (f, "\t.code\t16\n");
28164 #ifdef ARM_PE
28165   if (arm_dllexport_name_p (name))
28166     name = arm_strip_name_encoding (name);
28167 #endif
28168   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28169   fprintf (f, "\t.thumb_func\n");
28170   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28171
28172   return "";
28173 }
28174
28175 /* Handle the case of a double word load into a low register from
28176    a computed memory address.  The computed address may involve a
28177    register which is overwritten by the load.  */
28178 const char *
28179 thumb_load_double_from_address (rtx *operands)
28180 {
28181   rtx addr;
28182   rtx base;
28183   rtx offset;
28184   rtx arg1;
28185   rtx arg2;
28186
28187   gcc_assert (REG_P (operands[0]));
28188   gcc_assert (MEM_P (operands[1]));
28189
28190   /* Get the memory address.  */
28191   addr = XEXP (operands[1], 0);
28192
28193   /* Work out how the memory address is computed.  */
28194   switch (GET_CODE (addr))
28195     {
28196     case REG:
28197       operands[2] = adjust_address (operands[1], SImode, 4);
28198
28199       if (REGNO (operands[0]) == REGNO (addr))
28200         {
28201           output_asm_insn ("ldr\t%H0, %2", operands);
28202           output_asm_insn ("ldr\t%0, %1", operands);
28203         }
28204       else
28205         {
28206           output_asm_insn ("ldr\t%0, %1", operands);
28207           output_asm_insn ("ldr\t%H0, %2", operands);
28208         }
28209       break;
28210
28211     case CONST:
28212       /* Compute <address> + 4 for the high order load.  */
28213       operands[2] = adjust_address (operands[1], SImode, 4);
28214
28215       output_asm_insn ("ldr\t%0, %1", operands);
28216       output_asm_insn ("ldr\t%H0, %2", operands);
28217       break;
28218
28219     case PLUS:
28220       arg1   = XEXP (addr, 0);
28221       arg2   = XEXP (addr, 1);
28222
28223       if (CONSTANT_P (arg1))
28224         base = arg2, offset = arg1;
28225       else
28226         base = arg1, offset = arg2;
28227
28228       gcc_assert (REG_P (base));
28229
28230       /* Catch the case of <address> = <reg> + <reg> */
28231       if (REG_P (offset))
28232         {
28233           int reg_offset = REGNO (offset);
28234           int reg_base   = REGNO (base);
28235           int reg_dest   = REGNO (operands[0]);
28236
28237           /* Add the base and offset registers together into the
28238              higher destination register.  */
28239           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28240                        reg_dest + 1, reg_base, reg_offset);
28241
28242           /* Load the lower destination register from the address in
28243              the higher destination register.  */
28244           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28245                        reg_dest, reg_dest + 1);
28246
28247           /* Load the higher destination register from its own address
28248              plus 4.  */
28249           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28250                        reg_dest + 1, reg_dest + 1);
28251         }
28252       else
28253         {
28254           /* Compute <address> + 4 for the high order load.  */
28255           operands[2] = adjust_address (operands[1], SImode, 4);
28256
28257           /* If the computed address is held in the low order register
28258              then load the high order register first, otherwise always
28259              load the low order register first.  */
28260           if (REGNO (operands[0]) == REGNO (base))
28261             {
28262               output_asm_insn ("ldr\t%H0, %2", operands);
28263               output_asm_insn ("ldr\t%0, %1", operands);
28264             }
28265           else
28266             {
28267               output_asm_insn ("ldr\t%0, %1", operands);
28268               output_asm_insn ("ldr\t%H0, %2", operands);
28269             }
28270         }
28271       break;
28272
28273     case LABEL_REF:
28274       /* With no registers to worry about we can just load the value
28275          directly.  */
28276       operands[2] = adjust_address (operands[1], SImode, 4);
28277
28278       output_asm_insn ("ldr\t%H0, %2", operands);
28279       output_asm_insn ("ldr\t%0, %1", operands);
28280       break;
28281
28282     default:
28283       gcc_unreachable ();
28284     }
28285
28286   return "";
28287 }
28288
28289 const char *
28290 thumb_output_move_mem_multiple (int n, rtx *operands)
28291 {
28292   switch (n)
28293     {
28294     case 2:
28295       if (REGNO (operands[4]) > REGNO (operands[5]))
28296         std::swap (operands[4], operands[5]);
28297
28298       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28299       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28300       break;
28301
28302     case 3:
28303       if (REGNO (operands[4]) > REGNO (operands[5]))
28304         std::swap (operands[4], operands[5]);
28305       if (REGNO (operands[5]) > REGNO (operands[6]))
28306         std::swap (operands[5], operands[6]);
28307       if (REGNO (operands[4]) > REGNO (operands[5]))
28308         std::swap (operands[4], operands[5]);
28309
28310       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28311       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28312       break;
28313
28314     default:
28315       gcc_unreachable ();
28316     }
28317
28318   return "";
28319 }
28320
28321 /* Output a call-via instruction for thumb state.  */
28322 const char *
28323 thumb_call_via_reg (rtx reg)
28324 {
28325   int regno = REGNO (reg);
28326   rtx *labelp;
28327
28328   gcc_assert (regno < LR_REGNUM);
28329
28330   /* If we are in the normal text section we can use a single instance
28331      per compilation unit.  If we are doing function sections, then we need
28332      an entry per section, since we can't rely on reachability.  */
28333   if (in_section == text_section)
28334     {
28335       thumb_call_reg_needed = 1;
28336
28337       if (thumb_call_via_label[regno] == NULL)
28338         thumb_call_via_label[regno] = gen_label_rtx ();
28339       labelp = thumb_call_via_label + regno;
28340     }
28341   else
28342     {
28343       if (cfun->machine->call_via[regno] == NULL)
28344         cfun->machine->call_via[regno] = gen_label_rtx ();
28345       labelp = cfun->machine->call_via + regno;
28346     }
28347
28348   output_asm_insn ("bl\t%a0", labelp);
28349   return "";
28350 }
28351
28352 /* Routines for generating rtl.  */
28353 void
28354 thumb_expand_cpymemqi (rtx *operands)
28355 {
28356   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28357   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28358   HOST_WIDE_INT len = INTVAL (operands[2]);
28359   HOST_WIDE_INT offset = 0;
28360
28361   while (len >= 12)
28362     {
28363       emit_insn (gen_cpymem12b (out, in, out, in));
28364       len -= 12;
28365     }
28366
28367   if (len >= 8)
28368     {
28369       emit_insn (gen_cpymem8b (out, in, out, in));
28370       len -= 8;
28371     }
28372
28373   if (len >= 4)
28374     {
28375       rtx reg = gen_reg_rtx (SImode);
28376       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28377       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28378       len -= 4;
28379       offset += 4;
28380     }
28381
28382   if (len >= 2)
28383     {
28384       rtx reg = gen_reg_rtx (HImode);
28385       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28386                                               plus_constant (Pmode, in,
28387                                                              offset))));
28388       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28389                                                                 offset)),
28390                             reg));
28391       len -= 2;
28392       offset += 2;
28393     }
28394
28395   if (len)
28396     {
28397       rtx reg = gen_reg_rtx (QImode);
28398       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28399                                               plus_constant (Pmode, in,
28400                                                              offset))));
28401       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28402                                                                 offset)),
28403                             reg));
28404     }
28405 }
28406
28407 void
28408 thumb_reload_out_hi (rtx *operands)
28409 {
28410   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28411 }
28412
28413 /* Return the length of a function name prefix
28414     that starts with the character 'c'.  */
28415 static int
28416 arm_get_strip_length (int c)
28417 {
28418   switch (c)
28419     {
28420     ARM_NAME_ENCODING_LENGTHS
28421       default: return 0;
28422     }
28423 }
28424
28425 /* Return a pointer to a function's name with any
28426    and all prefix encodings stripped from it.  */
28427 const char *
28428 arm_strip_name_encoding (const char *name)
28429 {
28430   int skip;
28431
28432   while ((skip = arm_get_strip_length (* name)))
28433     name += skip;
28434
28435   return name;
28436 }
28437
28438 /* If there is a '*' anywhere in the name's prefix, then
28439    emit the stripped name verbatim, otherwise prepend an
28440    underscore if leading underscores are being used.  */
28441 void
28442 arm_asm_output_labelref (FILE *stream, const char *name)
28443 {
28444   int skip;
28445   int verbatim = 0;
28446
28447   while ((skip = arm_get_strip_length (* name)))
28448     {
28449       verbatim |= (*name == '*');
28450       name += skip;
28451     }
28452
28453   if (verbatim)
28454     fputs (name, stream);
28455   else
28456     asm_fprintf (stream, "%U%s", name);
28457 }
28458
28459 /* This function is used to emit an EABI tag and its associated value.
28460    We emit the numerical value of the tag in case the assembler does not
28461    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28462    the tag name in a comment so that anyone reading the assembler output
28463    will know which tag is being set.
28464
28465    This function is not static because arm-c.cc needs it too.  */
28466
28467 void
28468 arm_emit_eabi_attribute (const char *name, int num, int val)
28469 {
28470   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28471   if (flag_verbose_asm || flag_debug_asm)
28472     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28473   asm_fprintf (asm_out_file, "\n");
28474 }
28475
28476 /* This function is used to print CPU tuning information as comment
28477    in assembler file.  Pointers are not printed for now.  */
28478
28479 void
28480 arm_print_tune_info (void)
28481 {
28482   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28483   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28484                current_tune->constant_limit);
28485   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28486                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28487   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28488                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28489   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28490                "prefetch.l1_cache_size:\t%d\n",
28491                current_tune->prefetch.l1_cache_size);
28492   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28493                "prefetch.l1_cache_line_size:\t%d\n",
28494                current_tune->prefetch.l1_cache_line_size);
28495   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28496                "prefer_constant_pool:\t%d\n",
28497                (int) current_tune->prefer_constant_pool);
28498   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28499                "branch_cost:\t(s:speed, p:predictable)\n");
28500   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28501   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28502                current_tune->branch_cost (false, false));
28503   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28504                current_tune->branch_cost (false, true));
28505   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28506                current_tune->branch_cost (true, false));
28507   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28508                current_tune->branch_cost (true, true));
28509   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28510                "prefer_ldrd_strd:\t%d\n",
28511                (int) current_tune->prefer_ldrd_strd);
28512   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28513                "logical_op_non_short_circuit:\t[%d,%d]\n",
28514                (int) current_tune->logical_op_non_short_circuit_thumb,
28515                (int) current_tune->logical_op_non_short_circuit_arm);
28516   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28517                "disparage_flag_setting_t16_encodings:\t%d\n",
28518                (int) current_tune->disparage_flag_setting_t16_encodings);
28519   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28520                "string_ops_prefer_neon:\t%d\n",
28521                (int) current_tune->string_ops_prefer_neon);
28522   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28523                "max_insns_inline_memset:\t%d\n",
28524                current_tune->max_insns_inline_memset);
28525   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28526                current_tune->fusible_ops);
28527   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28528                (int) current_tune->sched_autopref);
28529 }
28530
28531 /* The last set of target options used to emit .arch directives, etc.  This
28532    could be a function-local static if it were not required to expose it as a
28533    root to the garbage collector.  */
28534 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28535
28536 /* Print .arch and .arch_extension directives corresponding to the
28537    current architecture configuration.  */
28538 static void
28539 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28540 {
28541   arm_build_target build_target;
28542   /* If the target options haven't changed since the last time we were called
28543      there is nothing to do.  This should be sufficient to suppress the
28544      majority of redundant work.  */
28545   if (last_asm_targ_options == targ_options)
28546     return;
28547
28548   last_asm_targ_options = targ_options;
28549
28550   build_target.isa = sbitmap_alloc (isa_num_bits);
28551   arm_configure_build_target (&build_target, targ_options, false);
28552
28553   if (build_target.core_name
28554       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28555     {
28556       const char* truncated_name
28557         = arm_rewrite_selected_cpu (build_target.core_name);
28558       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28559     }
28560
28561   const arch_option *arch
28562     = arm_parse_arch_option_name (all_architectures, "-march",
28563                                   build_target.arch_name);
28564   auto_sbitmap opt_bits (isa_num_bits);
28565
28566   gcc_assert (arch);
28567
28568   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28569     {
28570       /* Keep backward compatability for assemblers which don't support
28571          armv7ve.  Fortunately, none of the following extensions are reset
28572          by a .fpu directive.  */
28573       asm_fprintf (stream, "\t.arch armv7-a\n");
28574       asm_fprintf (stream, "\t.arch_extension virt\n");
28575       asm_fprintf (stream, "\t.arch_extension idiv\n");
28576       asm_fprintf (stream, "\t.arch_extension sec\n");
28577       asm_fprintf (stream, "\t.arch_extension mp\n");
28578     }
28579   else
28580     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28581
28582   /* The .fpu directive will reset any architecture extensions from the
28583      assembler that relate to the fp/vector extensions.  So put this out before
28584      any .arch_extension directives.  */
28585   const char *fpu_name = (TARGET_SOFT_FLOAT
28586                           ? "softvfp"
28587                           : arm_identify_fpu_from_isa (build_target.isa));
28588   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28589
28590   if (!arch->common.extensions)
28591     return;
28592
28593   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28594        opt->name != NULL;
28595        opt++)
28596     {
28597       if (!opt->remove)
28598         {
28599           arm_initialize_isa (opt_bits, opt->isa_bits);
28600
28601           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28602              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28603              floating point instructions is disabled.  So the following check
28604              restricts the printing of ".arch_extension mve" and
28605              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28606              this special behaviour because the feature bit "mve" and
28607              "mve_float" are not part of "fpu bits", so they are not cleared
28608              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28609              TARGET_HAVE_MVE_FLOAT are disabled.  */
28610           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28611               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28612                   && !TARGET_HAVE_MVE_FLOAT))
28613             continue;
28614
28615           /* If every feature bit of this option is set in the target ISA
28616              specification, print out the option name.  However, don't print
28617              anything if all the bits are part of the FPU specification.  */
28618           if (bitmap_subset_p (opt_bits, build_target.isa)
28619               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28620             asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28621         }
28622     }
28623 }
28624
28625 static void
28626 arm_file_start (void)
28627 {
28628   int val;
28629   bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28630   bool bti = (aarch_enable_bti == 1);
28631
28632   arm_print_asm_arch_directives
28633     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28634
28635   if (TARGET_BPABI)
28636     {
28637       /* If we have a named cpu, but we the assembler does not support that
28638          name via .cpu, put out a cpu name attribute; but don't do this if the
28639          name starts with the fictitious prefix, 'generic'.  */
28640       if (arm_active_target.core_name
28641           && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28642           && !startswith (arm_active_target.core_name, "generic"))
28643         {
28644           const char* truncated_name
28645             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28646           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28647             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28648                          truncated_name);
28649         }
28650
28651       if (print_tune_info)
28652         arm_print_tune_info ();
28653
28654       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28655         arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28656
28657       if (TARGET_HARD_FLOAT_ABI)
28658         arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28659
28660       /* Some of these attributes only apply when the corresponding features
28661          are used.  However we don't have any easy way of figuring this out.
28662          Conservatively record the setting that would have been used.  */
28663
28664       if (flag_rounding_math)
28665         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28666
28667       if (!flag_unsafe_math_optimizations)
28668         {
28669           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28670           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28671         }
28672       if (flag_signaling_nans)
28673         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28674
28675       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28676                            flag_finite_math_only ? 1 : 3);
28677
28678       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28679       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28680       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28681                                flag_short_enums ? 1 : 2);
28682
28683       /* Tag_ABI_optimization_goals.  */
28684       if (optimize_size)
28685         val = 4;
28686       else if (optimize >= 2)
28687         val = 2;
28688       else if (optimize)
28689         val = 1;
28690       else
28691         val = 6;
28692       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28693
28694       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28695                                unaligned_access);
28696
28697       if (arm_fp16_format)
28698         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28699                              (int) arm_fp16_format);
28700
28701       if (TARGET_HAVE_PACBTI)
28702         {
28703           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28704           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28705         }
28706       else if (pac || bti)
28707         {
28708           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28709           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28710         }
28711
28712       if (bti)
28713         arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28714       if (pac)
28715         arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28716
28717       if (arm_lang_output_object_attributes_hook)
28718         arm_lang_output_object_attributes_hook();
28719     }
28720
28721   default_file_start ();
28722 }
28723
28724 static void
28725 arm_file_end (void)
28726 {
28727   int regno;
28728
28729   /* Just in case the last function output in the assembler had non-default
28730      architecture directives, we force the assembler state back to the default
28731      set, so that any 'calculated' build attributes are based on the default
28732      options rather than the special options for that function.  */
28733   arm_print_asm_arch_directives
28734     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28735
28736   if (NEED_INDICATE_EXEC_STACK)
28737     /* Add .note.GNU-stack.  */
28738     file_end_indicate_exec_stack ();
28739
28740   if (! thumb_call_reg_needed)
28741     return;
28742
28743   switch_to_section (text_section);
28744   asm_fprintf (asm_out_file, "\t.code 16\n");
28745   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28746
28747   for (regno = 0; regno < LR_REGNUM; regno++)
28748     {
28749       rtx label = thumb_call_via_label[regno];
28750
28751       if (label != 0)
28752         {
28753           targetm.asm_out.internal_label (asm_out_file, "L",
28754                                           CODE_LABEL_NUMBER (label));
28755           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28756         }
28757     }
28758 }
28759
28760 #ifndef ARM_PE
28761 /* Symbols in the text segment can be accessed without indirecting via the
28762    constant pool; it may take an extra binary operation, but this is still
28763    faster than indirecting via memory.  Don't do this when not optimizing,
28764    since we won't be calculating al of the offsets necessary to do this
28765    simplification.  */
28766
28767 static void
28768 arm_encode_section_info (tree decl, rtx rtl, int first)
28769 {
28770   if (optimize > 0 && TREE_CONSTANT (decl))
28771     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28772
28773   default_encode_section_info (decl, rtl, first);
28774 }
28775 #endif /* !ARM_PE */
28776
28777 static void
28778 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28779 {
28780   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28781       && !strcmp (prefix, "L"))
28782     {
28783       arm_ccfsm_state = 0;
28784       arm_target_insn = NULL;
28785     }
28786   default_internal_label (stream, prefix, labelno);
28787 }
28788
28789 /* Define classes to generate code as RTL or output asm to a file.
28790    Using templates then allows to use the same code to output code
28791    sequences in the two formats.  */
28792 class thumb1_const_rtl
28793 {
28794  public:
28795   thumb1_const_rtl (rtx dst) : dst (dst) {}
28796
28797   void mov (HOST_WIDE_INT val)
28798   {
28799     emit_set_insn (dst, GEN_INT (val));
28800   }
28801
28802   void add (HOST_WIDE_INT val)
28803   {
28804     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28805   }
28806
28807   void ashift (HOST_WIDE_INT shift)
28808   {
28809     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28810   }
28811
28812   void neg ()
28813   {
28814     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28815   }
28816
28817  private:
28818   rtx dst;
28819 };
28820
28821 class thumb1_const_print
28822 {
28823  public:
28824   thumb1_const_print (FILE *f, int regno)
28825   {
28826     t_file = f;
28827     dst_regname = reg_names[regno];
28828   }
28829
28830   void mov (HOST_WIDE_INT val)
28831   {
28832     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28833                  dst_regname, val);
28834   }
28835
28836   void add (HOST_WIDE_INT val)
28837   {
28838     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28839                  dst_regname, val);
28840   }
28841
28842   void ashift (HOST_WIDE_INT shift)
28843   {
28844     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28845                  dst_regname, shift);
28846   }
28847
28848   void neg ()
28849   {
28850     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28851   }
28852
28853  private:
28854   FILE *t_file;
28855   const char *dst_regname;
28856 };
28857
28858 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28859    Avoid generating useless code when one of the bytes is zero.  */
28860 template <class T>
28861 void
28862 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28863 {
28864   bool mov_done_p = false;
28865   unsigned HOST_WIDE_INT val = op1;
28866   int shift = 0;
28867   int i;
28868
28869   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28870
28871   if (val <= 255)
28872     {
28873       dst.mov (val);
28874       return;
28875     }
28876
28877   /* For negative numbers with the first nine bits set, build the
28878      opposite of OP1, then negate it, it's generally shorter and not
28879      longer.  */
28880   if ((val & 0xFF800000) == 0xFF800000)
28881     {
28882       thumb1_gen_const_int_1 (dst, -op1);
28883       dst.neg ();
28884       return;
28885     }
28886
28887   /* In the general case, we need 7 instructions to build
28888      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28889      do better if VAL is small enough, or
28890      right-shiftable by a suitable amount.  If the
28891      right-shift enables to encode at least one less byte,
28892      it's worth it: we save a adds and a lsls at the
28893      expense of a final lsls.  */
28894   int final_shift = number_of_first_bit_set (val);
28895
28896   int leading_zeroes = clz_hwi (val);
28897   int number_of_bytes_needed
28898     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28899        / BITS_PER_UNIT) + 1;
28900   int number_of_bytes_needed2
28901     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28902        / BITS_PER_UNIT) + 1;
28903
28904   if (number_of_bytes_needed2 < number_of_bytes_needed)
28905     val >>= final_shift;
28906   else
28907     final_shift = 0;
28908
28909   /* If we are in a very small range, we can use either a single movs
28910      or movs+adds.  */
28911   if (val <= 510)
28912     {
28913       if (val > 255)
28914         {
28915           unsigned HOST_WIDE_INT high = val - 255;
28916
28917           dst.mov (high);
28918           dst.add (255);
28919         }
28920       else
28921         dst.mov (val);
28922
28923       if (final_shift > 0)
28924         dst.ashift (final_shift);
28925     }
28926   else
28927     {
28928       /* General case, emit upper 3 bytes as needed.  */
28929       for (i = 0; i < 3; i++)
28930         {
28931           unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28932
28933           if (byte)
28934             {
28935               /* We are about to emit new bits, stop accumulating a
28936                  shift amount, and left-shift only if we have already
28937                  emitted some upper bits.  */
28938               if (mov_done_p)
28939                 {
28940                   dst.ashift (shift);
28941                   dst.add (byte);
28942                 }
28943               else
28944                 dst.mov (byte);
28945
28946               /* Stop accumulating shift amount since we've just
28947                  emitted some bits.  */
28948               shift = 0;
28949
28950               mov_done_p = true;
28951             }
28952
28953           if (mov_done_p)
28954             shift += 8;
28955         }
28956
28957       /* Emit lower byte.  */
28958       if (!mov_done_p)
28959         dst.mov (val & 0xff);
28960       else
28961         {
28962           dst.ashift (shift);
28963           if (val & 0xff)
28964             dst.add (val & 0xff);
28965         }
28966
28967       if (final_shift > 0)
28968         dst.ashift (final_shift);
28969     }
28970 }
28971
28972 /* Proxies for thumb1.md, since the thumb1_const_print and
28973    thumb1_const_rtl classes are not exported.  */
28974 void
28975 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28976 {
28977   thumb1_const_rtl t (dst);
28978   thumb1_gen_const_int_1 (t, op1);
28979 }
28980
28981 void
28982 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28983 {
28984   thumb1_const_print t (asm_out_file, REGNO (dst));
28985   thumb1_gen_const_int_1 (t, op1);
28986 }
28987
28988 /* Output code to add DELTA to the first argument, and then jump
28989    to FUNCTION.  Used for C++ multiple inheritance.  */
28990
28991 static void
28992 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28993                      HOST_WIDE_INT, tree function)
28994 {
28995   static int thunk_label = 0;
28996   char label[256];
28997   char labelpc[256];
28998   int mi_delta = delta;
28999   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
29000   int shift = 0;
29001   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
29002                     ? 1 : 0);
29003   if (mi_delta < 0)
29004     mi_delta = - mi_delta;
29005
29006   final_start_function (emit_barrier (), file, 1);
29007
29008   if (TARGET_THUMB1)
29009     {
29010       int labelno = thunk_label++;
29011       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
29012       /* Thunks are entered in arm mode when available.  */
29013       if (TARGET_THUMB1_ONLY)
29014         {
29015           /* push r3 so we can use it as a temporary.  */
29016           /* TODO: Omit this save if r3 is not used.  */
29017           fputs ("\tpush {r3}\n", file);
29018
29019           /* With -mpure-code, we cannot load the address from the
29020              constant pool: we build it explicitly.  */
29021           if (target_pure_code)
29022             {
29023               fputs ("\tmovs\tr3, #:upper8_15:#", file);
29024               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29025               fputc ('\n', file);
29026               fputs ("\tlsls r3, #8\n", file);
29027               fputs ("\tadds\tr3, #:upper0_7:#", file);
29028               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29029               fputc ('\n', file);
29030               fputs ("\tlsls r3, #8\n", file);
29031               fputs ("\tadds\tr3, #:lower8_15:#", file);
29032               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29033               fputc ('\n', file);
29034               fputs ("\tlsls r3, #8\n", file);
29035               fputs ("\tadds\tr3, #:lower0_7:#", file);
29036               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29037               fputc ('\n', file);
29038             }
29039           else
29040             fputs ("\tldr\tr3, ", file);
29041         }
29042       else
29043         {
29044           fputs ("\tldr\tr12, ", file);
29045         }
29046
29047       if (!target_pure_code)
29048         {
29049           assemble_name (file, label);
29050           fputc ('\n', file);
29051         }
29052
29053       if (flag_pic)
29054         {
29055           /* If we are generating PIC, the ldr instruction below loads
29056              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
29057              the address of the add + 8, so we have:
29058
29059              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29060                  = target + 1.
29061
29062              Note that we have "+ 1" because some versions of GNU ld
29063              don't set the low bit of the result for R_ARM_REL32
29064              relocations against thumb function symbols.
29065              On ARMv6M this is +4, not +8.  */
29066           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29067           assemble_name (file, labelpc);
29068           fputs (":\n", file);
29069           if (TARGET_THUMB1_ONLY)
29070             {
29071               /* This is 2 insns after the start of the thunk, so we know it
29072                  is 4-byte aligned.  */
29073               fputs ("\tadd\tr3, pc, r3\n", file);
29074               fputs ("\tmov r12, r3\n", file);
29075             }
29076           else
29077             fputs ("\tadd\tr12, pc, r12\n", file);
29078         }
29079       else if (TARGET_THUMB1_ONLY)
29080         fputs ("\tmov r12, r3\n", file);
29081     }
29082   if (TARGET_THUMB1_ONLY)
29083     {
29084       if (mi_delta > 255)
29085         {
29086           /* With -mpure-code, we cannot load MI_DELTA from the
29087              constant pool: we build it explicitly.  */
29088           if (target_pure_code)
29089             {
29090               thumb1_const_print r3 (file, 3);
29091               thumb1_gen_const_int_1 (r3, mi_delta);
29092             }
29093           else
29094             {
29095               fputs ("\tldr\tr3, ", file);
29096               assemble_name (file, label);
29097               fputs ("+4\n", file);
29098             }
29099           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29100                        mi_op, this_regno, this_regno);
29101         }
29102       else if (mi_delta != 0)
29103         {
29104           /* Thumb1 unified syntax requires s suffix in instruction name when
29105              one of the operands is immediate.  */
29106           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29107                        mi_op, this_regno, this_regno,
29108                        mi_delta);
29109         }
29110     }
29111   else
29112     {
29113       /* TODO: Use movw/movt for large constants when available.  */
29114       while (mi_delta != 0)
29115         {
29116           if ((mi_delta & (3 << shift)) == 0)
29117             shift += 2;
29118           else
29119             {
29120               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29121                            mi_op, this_regno, this_regno,
29122                            mi_delta & (0xff << shift));
29123               mi_delta &= ~(0xff << shift);
29124               shift += 8;
29125             }
29126         }
29127     }
29128   if (TARGET_THUMB1)
29129     {
29130       if (TARGET_THUMB1_ONLY)
29131         fputs ("\tpop\t{r3}\n", file);
29132
29133       fprintf (file, "\tbx\tr12\n");
29134
29135       /* With -mpure-code, we don't need to emit literals for the
29136          function address and delta since we emitted code to build
29137          them.  */
29138       if (!target_pure_code)
29139         {
29140           ASM_OUTPUT_ALIGN (file, 2);
29141           assemble_name (file, label);
29142           fputs (":\n", file);
29143           if (flag_pic)
29144             {
29145               /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
29146               rtx tem = XEXP (DECL_RTL (function), 0);
29147               /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29148                  pipeline offset is four rather than eight.  Adjust the offset
29149                  accordingly.  */
29150               tem = plus_constant (GET_MODE (tem), tem,
29151                                    TARGET_THUMB1_ONLY ? -3 : -7);
29152               tem = gen_rtx_MINUS (GET_MODE (tem),
29153                                    tem,
29154                                    gen_rtx_SYMBOL_REF (Pmode,
29155                                                        ggc_strdup (labelpc)));
29156               assemble_integer (tem, 4, BITS_PER_WORD, 1);
29157             }
29158           else
29159             /* Output ".word .LTHUNKn".  */
29160             assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29161
29162           if (TARGET_THUMB1_ONLY && mi_delta > 255)
29163             assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29164         }
29165     }
29166   else
29167     {
29168       fputs ("\tb\t", file);
29169       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29170       if (NEED_PLT_RELOC)
29171         fputs ("(PLT)", file);
29172       fputc ('\n', file);
29173     }
29174
29175   final_end_function ();
29176 }
29177
29178 /* MI thunk handling for TARGET_32BIT.  */
29179
29180 static void
29181 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29182                        HOST_WIDE_INT vcall_offset, tree function)
29183 {
29184   const bool long_call_p = arm_is_long_call_p (function);
29185
29186   /* On ARM, this_regno is R0 or R1 depending on
29187      whether the function returns an aggregate or not.
29188   */
29189   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29190                                        function)
29191                     ? R1_REGNUM : R0_REGNUM);
29192
29193   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29194   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29195   reload_completed = 1;
29196   emit_note (NOTE_INSN_PROLOGUE_END);
29197
29198   /* Add DELTA to THIS_RTX.  */
29199   if (delta != 0)
29200     arm_split_constant (PLUS, Pmode, NULL_RTX,
29201                         delta, this_rtx, this_rtx, false);
29202
29203   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
29204   if (vcall_offset != 0)
29205     {
29206       /* Load *THIS_RTX.  */
29207       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29208       /* Compute *THIS_RTX + VCALL_OFFSET.  */
29209       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29210                           false);
29211       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
29212       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29213       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29214     }
29215
29216   /* Generate a tail call to the target function.  */
29217   if (!TREE_USED (function))
29218     {
29219       assemble_external (function);
29220       TREE_USED (function) = 1;
29221     }
29222   rtx funexp = XEXP (DECL_RTL (function), 0);
29223   if (long_call_p)
29224     {
29225       emit_move_insn (temp, funexp);
29226       funexp = temp;
29227     }
29228   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29229   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29230   SIBLING_CALL_P (insn) = 1;
29231   emit_barrier ();
29232
29233   /* Indirect calls require a bit of fixup in PIC mode.  */
29234   if (long_call_p)
29235     {
29236       split_all_insns_noflow ();
29237       arm_reorg ();
29238     }
29239
29240   insn = get_insns ();
29241   shorten_branches (insn);
29242   final_start_function (insn, file, 1);
29243   final (insn, file, 1);
29244   final_end_function ();
29245
29246   /* Stop pretending this is a post-reload pass.  */
29247   reload_completed = 0;
29248 }
29249
29250 /* Output code to add DELTA to the first argument, and then jump
29251    to FUNCTION.  Used for C++ multiple inheritance.  */
29252
29253 static void
29254 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29255                      HOST_WIDE_INT vcall_offset, tree function)
29256 {
29257   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29258
29259   assemble_start_function (thunk, fnname);
29260   if (TARGET_32BIT)
29261     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29262   else
29263     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29264   assemble_end_function (thunk, fnname);
29265 }
29266
29267 int
29268 arm_emit_vector_const (FILE *file, rtx x)
29269 {
29270   int i;
29271   const char * pattern;
29272
29273   gcc_assert (GET_CODE (x) == CONST_VECTOR);
29274
29275   switch (GET_MODE (x))
29276     {
29277     case E_V2SImode: pattern = "%08x"; break;
29278     case E_V4HImode: pattern = "%04x"; break;
29279     case E_V8QImode: pattern = "%02x"; break;
29280     default:       gcc_unreachable ();
29281     }
29282
29283   fprintf (file, "0x");
29284   for (i = CONST_VECTOR_NUNITS (x); i--;)
29285     {
29286       rtx element;
29287
29288       element = CONST_VECTOR_ELT (x, i);
29289       fprintf (file, pattern, INTVAL (element));
29290     }
29291
29292   return 1;
29293 }
29294
29295 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29296    HFmode constant pool entries are actually loaded with ldr.  */
29297 void
29298 arm_emit_fp16_const (rtx c)
29299 {
29300   long bits;
29301
29302   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29303   if (WORDS_BIG_ENDIAN)
29304     assemble_zeros (2);
29305   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29306   if (!WORDS_BIG_ENDIAN)
29307     assemble_zeros (2);
29308 }
29309
29310 const char *
29311 arm_output_load_gr (rtx *operands)
29312 {
29313   rtx reg;
29314   rtx offset;
29315   rtx wcgr;
29316   rtx sum;
29317
29318   if (!MEM_P (operands [1])
29319       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29320       || !REG_P (reg = XEXP (sum, 0))
29321       || !CONST_INT_P (offset = XEXP (sum, 1))
29322       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29323     return "wldrw%?\t%0, %1";
29324
29325   /* Fix up an out-of-range load of a GR register.  */
29326   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29327   wcgr = operands[0];
29328   operands[0] = reg;
29329   output_asm_insn ("ldr%?\t%0, %1", operands);
29330
29331   operands[0] = wcgr;
29332   operands[1] = reg;
29333   output_asm_insn ("tmcr%?\t%0, %1", operands);
29334   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29335
29336   return "";
29337 }
29338
29339 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29340
29341    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29342    named arg and all anonymous args onto the stack.
29343    XXX I know the prologue shouldn't be pushing registers, but it is faster
29344    that way.  */
29345
29346 static void
29347 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29348                             const function_arg_info &arg,
29349                             int *pretend_size,
29350                             int second_time ATTRIBUTE_UNUSED)
29351 {
29352   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29353   int nregs;
29354
29355   cfun->machine->uses_anonymous_args = 1;
29356   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29357     {
29358       nregs = pcum->aapcs_ncrn;
29359       if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29360           && (nregs & 1))
29361         {
29362           int res = arm_needs_doubleword_align (arg.mode, arg.type);
29363           if (res < 0 && warn_psabi)
29364             inform (input_location, "parameter passing for argument of "
29365                     "type %qT changed in GCC 7.1", arg.type);
29366           else if (res > 0)
29367             {
29368               nregs++;
29369               if (res > 1 && warn_psabi)
29370                 inform (input_location,
29371                         "parameter passing for argument of type "
29372                         "%qT changed in GCC 9.1", arg.type);
29373             }
29374         }
29375     }
29376   else
29377     nregs = pcum->nregs;
29378
29379   if (nregs < NUM_ARG_REGS)
29380     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29381 }
29382
29383 /* We can't rely on the caller doing the proper promotion when
29384    using APCS or ATPCS.  */
29385
29386 static bool
29387 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29388 {
29389     return !TARGET_AAPCS_BASED;
29390 }
29391
29392 static machine_mode
29393 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29394                            machine_mode mode,
29395                            int *punsignedp ATTRIBUTE_UNUSED,
29396                            const_tree fntype ATTRIBUTE_UNUSED,
29397                            int for_return ATTRIBUTE_UNUSED)
29398 {
29399   if (GET_MODE_CLASS (mode) == MODE_INT
29400       && GET_MODE_SIZE (mode) < 4)
29401     return SImode;
29402
29403   return mode;
29404 }
29405
29406
29407 static bool
29408 arm_default_short_enums (void)
29409 {
29410   return ARM_DEFAULT_SHORT_ENUMS;
29411 }
29412
29413
29414 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
29415
29416 static bool
29417 arm_align_anon_bitfield (void)
29418 {
29419   return TARGET_AAPCS_BASED;
29420 }
29421
29422
29423 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
29424
29425 static tree
29426 arm_cxx_guard_type (void)
29427 {
29428   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29429 }
29430
29431
29432 /* The EABI says test the least significant bit of a guard variable.  */
29433
29434 static bool
29435 arm_cxx_guard_mask_bit (void)
29436 {
29437   return TARGET_AAPCS_BASED;
29438 }
29439
29440
29441 /* The EABI specifies that all array cookies are 8 bytes long.  */
29442
29443 static tree
29444 arm_get_cookie_size (tree type)
29445 {
29446   tree size;
29447
29448   if (!TARGET_AAPCS_BASED)
29449     return default_cxx_get_cookie_size (type);
29450
29451   size = build_int_cst (sizetype, 8);
29452   return size;
29453 }
29454
29455
29456 /* The EABI says that array cookies should also contain the element size.  */
29457
29458 static bool
29459 arm_cookie_has_size (void)
29460 {
29461   return TARGET_AAPCS_BASED;
29462 }
29463
29464
29465 /* The EABI says constructors and destructors should return a pointer to
29466    the object constructed/destroyed.  */
29467
29468 static bool
29469 arm_cxx_cdtor_returns_this (void)
29470 {
29471   return TARGET_AAPCS_BASED;
29472 }
29473
29474 /* The EABI says that an inline function may never be the key
29475    method.  */
29476
29477 static bool
29478 arm_cxx_key_method_may_be_inline (void)
29479 {
29480   return !TARGET_AAPCS_BASED;
29481 }
29482
29483 static void
29484 arm_cxx_determine_class_data_visibility (tree decl)
29485 {
29486   if (!TARGET_AAPCS_BASED
29487       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29488     return;
29489
29490   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29491      is exported.  However, on systems without dynamic vague linkage,
29492      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29493   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29494     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29495   else
29496     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29497   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29498 }
29499
29500 static bool
29501 arm_cxx_class_data_always_comdat (void)
29502 {
29503   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29504      vague linkage if the class has no key function.  */
29505   return !TARGET_AAPCS_BASED;
29506 }
29507
29508
29509 /* The EABI says __aeabi_atexit should be used to register static
29510    destructors.  */
29511
29512 static bool
29513 arm_cxx_use_aeabi_atexit (void)
29514 {
29515   return TARGET_AAPCS_BASED;
29516 }
29517
29518
29519 void
29520 arm_set_return_address (rtx source, rtx scratch)
29521 {
29522   arm_stack_offsets *offsets;
29523   HOST_WIDE_INT delta;
29524   rtx addr, mem;
29525   unsigned long saved_regs;
29526
29527   offsets = arm_get_frame_offsets ();
29528   saved_regs = offsets->saved_regs_mask;
29529
29530   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29531     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29532   else
29533     {
29534       if (frame_pointer_needed)
29535         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29536       else
29537         {
29538           /* LR will be the first saved register.  */
29539           delta = offsets->outgoing_args - (offsets->frame + 4);
29540
29541
29542           if (delta >= 4096)
29543             {
29544               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29545                                      GEN_INT (delta & ~4095)));
29546               addr = scratch;
29547               delta &= 4095;
29548             }
29549           else
29550             addr = stack_pointer_rtx;
29551
29552           addr = plus_constant (Pmode, addr, delta);
29553         }
29554
29555       /* The store needs to be marked to prevent DSE from deleting
29556          it as dead if it is based on fp.  */
29557       mem = gen_frame_mem (Pmode, addr);
29558       MEM_VOLATILE_P (mem) = true;
29559       emit_move_insn (mem, source);
29560     }
29561 }
29562
29563
29564 void
29565 thumb_set_return_address (rtx source, rtx scratch)
29566 {
29567   arm_stack_offsets *offsets;
29568   HOST_WIDE_INT delta;
29569   HOST_WIDE_INT limit;
29570   int reg;
29571   rtx addr, mem;
29572   unsigned long mask;
29573
29574   emit_use (source);
29575
29576   offsets = arm_get_frame_offsets ();
29577   mask = offsets->saved_regs_mask;
29578   if (mask & (1 << LR_REGNUM))
29579     {
29580       limit = 1024;
29581       /* Find the saved regs.  */
29582       if (frame_pointer_needed)
29583         {
29584           delta = offsets->soft_frame - offsets->saved_args;
29585           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29586           if (TARGET_THUMB1)
29587             limit = 128;
29588         }
29589       else
29590         {
29591           delta = offsets->outgoing_args - offsets->saved_args;
29592           reg = SP_REGNUM;
29593         }
29594       /* Allow for the stack frame.  */
29595       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29596         delta -= 16;
29597       /* The link register is always the first saved register.  */
29598       delta -= 4;
29599
29600       /* Construct the address.  */
29601       addr = gen_rtx_REG (SImode, reg);
29602       if (delta > limit)
29603         {
29604           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29605           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29606           addr = scratch;
29607         }
29608       else
29609         addr = plus_constant (Pmode, addr, delta);
29610
29611       /* The store needs to be marked to prevent DSE from deleting
29612          it as dead if it is based on fp.  */
29613       mem = gen_frame_mem (Pmode, addr);
29614       MEM_VOLATILE_P (mem) = true;
29615       emit_move_insn (mem, source);
29616     }
29617   else
29618     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29619 }
29620
29621 /* Implements target hook vector_mode_supported_p.  */
29622 bool
29623 arm_vector_mode_supported_p (machine_mode mode)
29624 {
29625   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29626   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29627       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29628       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29629       || mode == V8BFmode))
29630     return true;
29631
29632   if ((TARGET_NEON || TARGET_IWMMXT)
29633       && ((mode == V2SImode)
29634           || (mode == V4HImode)
29635           || (mode == V8QImode)))
29636     return true;
29637
29638   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29639       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29640       || mode == V2HAmode))
29641     return true;
29642
29643   if (TARGET_HAVE_MVE
29644       && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29645     return true;
29646
29647   if (TARGET_HAVE_MVE_FLOAT
29648       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29649     return true;
29650
29651   return false;
29652 }
29653
29654 /* Implements target hook array_mode_supported_p.  */
29655
29656 static bool
29657 arm_array_mode_supported_p (machine_mode mode,
29658                             unsigned HOST_WIDE_INT nelems)
29659 {
29660   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29661      for now, as the lane-swapping logic needs to be extended in the expanders.
29662      See PR target/82518.  */
29663   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29664       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29665       && (nelems >= 2 && nelems <= 4))
29666     return true;
29667
29668   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29669       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29670     return true;
29671
29672   return false;
29673 }
29674
29675 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29676    registers when autovectorizing for Neon, at least until multiple vector
29677    widths are supported properly by the middle-end.  */
29678
29679 static machine_mode
29680 arm_preferred_simd_mode (scalar_mode mode)
29681 {
29682   if (TARGET_NEON)
29683     switch (mode)
29684       {
29685       case E_HFmode:
29686         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29687       case E_SFmode:
29688         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29689       case E_SImode:
29690         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29691       case E_HImode:
29692         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29693       case E_QImode:
29694         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29695       case E_DImode:
29696         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29697           return V2DImode;
29698         break;
29699
29700       default:;
29701       }
29702
29703   if (TARGET_REALLY_IWMMXT)
29704     switch (mode)
29705       {
29706       case E_SImode:
29707         return V2SImode;
29708       case E_HImode:
29709         return V4HImode;
29710       case E_QImode:
29711         return V8QImode;
29712
29713       default:;
29714       }
29715
29716   if (TARGET_HAVE_MVE)
29717     switch (mode)
29718       {
29719       case E_QImode:
29720         return V16QImode;
29721       case E_HImode:
29722         return V8HImode;
29723       case E_SImode:
29724         return V4SImode;
29725
29726       default:;
29727       }
29728
29729   if (TARGET_HAVE_MVE_FLOAT)
29730     switch (mode)
29731       {
29732       case E_HFmode:
29733         return V8HFmode;
29734       case E_SFmode:
29735         return V4SFmode;
29736
29737       default:;
29738       }
29739
29740   return word_mode;
29741 }
29742
29743 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29744
29745    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29746    using r0-r4 for function arguments, r7 for the stack frame and don't have
29747    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29748    potentially problematic instructions accept high registers so this is not
29749    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29750    that require many low registers.  */
29751 static bool
29752 arm_class_likely_spilled_p (reg_class_t rclass)
29753 {
29754   if ((TARGET_THUMB1 && rclass == LO_REGS)
29755       || rclass  == CC_REG)
29756     return true;
29757
29758   return default_class_likely_spilled_p (rclass);
29759 }
29760
29761 /* Implements target hook small_register_classes_for_mode_p.  */
29762 bool
29763 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29764 {
29765   return TARGET_THUMB1;
29766 }
29767
29768 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29769    ARM insns and therefore guarantee that the shift count is modulo 256.
29770    DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29771    guarantee no particular behavior for out-of-range counts.  */
29772
29773 static unsigned HOST_WIDE_INT
29774 arm_shift_truncation_mask (machine_mode mode)
29775 {
29776   return mode == SImode ? 255 : 0;
29777 }
29778
29779
29780 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29781
29782 unsigned int
29783 arm_debugger_regno (unsigned int regno)
29784 {
29785   if (regno < 16)
29786     return regno;
29787
29788   if (IS_VFP_REGNUM (regno))
29789     {
29790       /* See comment in arm_dwarf_register_span.  */
29791       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29792         return 64 + regno - FIRST_VFP_REGNUM;
29793       else
29794         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29795     }
29796
29797   if (IS_IWMMXT_GR_REGNUM (regno))
29798     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29799
29800   if (IS_IWMMXT_REGNUM (regno))
29801     return 112 + regno - FIRST_IWMMXT_REGNUM;
29802
29803   if (IS_PAC_REGNUM (regno))
29804     return DWARF_PAC_REGNUM;
29805
29806   return DWARF_FRAME_REGISTERS;
29807 }
29808
29809 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29810    GCC models tham as 64 32-bit registers, so we need to describe this to
29811    the DWARF generation code.  Other registers can use the default.  */
29812 static rtx
29813 arm_dwarf_register_span (rtx rtl)
29814 {
29815   machine_mode mode;
29816   unsigned regno;
29817   rtx parts[16];
29818   int nregs;
29819   int i;
29820
29821   regno = REGNO (rtl);
29822   if (!IS_VFP_REGNUM (regno))
29823     return NULL_RTX;
29824
29825   /* XXX FIXME: The EABI defines two VFP register ranges:
29826         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29827         256-287: D0-D31
29828      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29829      corresponding D register.  Until GDB supports this, we shall use the
29830      legacy encodings.  We also use these encodings for D0-D15 for
29831      compatibility with older debuggers.  */
29832   mode = GET_MODE (rtl);
29833   if (GET_MODE_SIZE (mode) < 8)
29834     return NULL_RTX;
29835
29836   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29837     {
29838       nregs = GET_MODE_SIZE (mode) / 4;
29839       for (i = 0; i < nregs; i += 2)
29840         if (TARGET_BIG_END)
29841           {
29842             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29843             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29844           }
29845         else
29846           {
29847             parts[i] = gen_rtx_REG (SImode, regno + i);
29848             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29849           }
29850     }
29851   else
29852     {
29853       nregs = GET_MODE_SIZE (mode) / 8;
29854       for (i = 0; i < nregs; i++)
29855         parts[i] = gen_rtx_REG (DImode, regno + i);
29856     }
29857
29858   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29859 }
29860
29861 #if ARM_UNWIND_INFO
29862 /* Emit unwind directives for a store-multiple instruction or stack pointer
29863    push during alignment.
29864    These should only ever be generated by the function prologue code, so
29865    expect them to have a particular form.
29866    The store-multiple instruction sometimes pushes pc as the last register,
29867    although it should not be tracked into unwind information, or for -Os
29868    sometimes pushes some dummy registers before first register that needs
29869    to be tracked in unwind information; such dummy registers are there just
29870    to avoid separate stack adjustment, and will not be restored in the
29871    epilogue.  */
29872
29873 static void
29874 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29875 {
29876   int i;
29877   HOST_WIDE_INT offset;
29878   HOST_WIDE_INT nregs;
29879   int reg_size;
29880   unsigned reg;
29881   unsigned lastreg;
29882   unsigned padfirst = 0, padlast = 0;
29883   rtx e;
29884
29885   e = XVECEXP (p, 0, 0);
29886   gcc_assert (GET_CODE (e) == SET);
29887
29888   /* First insn will adjust the stack pointer.  */
29889   gcc_assert (GET_CODE (e) == SET
29890               && REG_P (SET_DEST (e))
29891               && REGNO (SET_DEST (e)) == SP_REGNUM
29892               && GET_CODE (SET_SRC (e)) == PLUS);
29893
29894   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29895   nregs = XVECLEN (p, 0) - 1;
29896   gcc_assert (nregs);
29897
29898   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29899   if (reg < 16 || IS_PAC_REGNUM (reg))
29900     {
29901       /* For -Os dummy registers can be pushed at the beginning to
29902          avoid separate stack pointer adjustment.  */
29903       e = XVECEXP (p, 0, 1);
29904       e = XEXP (SET_DEST (e), 0);
29905       if (GET_CODE (e) == PLUS)
29906         padfirst = INTVAL (XEXP (e, 1));
29907       gcc_assert (padfirst == 0 || optimize_size);
29908       /* The function prologue may also push pc, but not annotate it as it is
29909          never restored.  We turn this into a stack pointer adjustment.  */
29910       e = XVECEXP (p, 0, nregs);
29911       e = XEXP (SET_DEST (e), 0);
29912       if (GET_CODE (e) == PLUS)
29913         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29914       else
29915         padlast = offset - 4;
29916       gcc_assert (padlast == 0 || padlast == 4);
29917       if (padlast == 4)
29918         fprintf (out_file, "\t.pad #4\n");
29919       reg_size = 4;
29920       fprintf (out_file, "\t.save {");
29921     }
29922   else if (IS_VFP_REGNUM (reg))
29923     {
29924       reg_size = 8;
29925       fprintf (out_file, "\t.vsave {");
29926     }
29927   else
29928     /* Unknown register type.  */
29929     gcc_unreachable ();
29930
29931   /* If the stack increment doesn't match the size of the saved registers,
29932      something has gone horribly wrong.  */
29933   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29934
29935   offset = padfirst;
29936   lastreg = 0;
29937   /* The remaining insns will describe the stores.  */
29938   for (i = 1; i <= nregs; i++)
29939     {
29940       /* Expect (set (mem <addr>) (reg)).
29941          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29942       e = XVECEXP (p, 0, i);
29943       gcc_assert (GET_CODE (e) == SET
29944                   && MEM_P (SET_DEST (e))
29945                   && REG_P (SET_SRC (e)));
29946
29947       reg = REGNO (SET_SRC (e));
29948       gcc_assert (reg >= lastreg);
29949
29950       if (i != 1)
29951         fprintf (out_file, ", ");
29952       /* We can't use %r for vfp because we need to use the
29953          double precision register names.  */
29954       if (IS_VFP_REGNUM (reg))
29955         asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29956       else if (IS_PAC_REGNUM (reg))
29957         asm_fprintf (asm_out_file, "ra_auth_code");
29958       else
29959         asm_fprintf (out_file, "%r", reg);
29960
29961       if (flag_checking)
29962         {
29963           /* Check that the addresses are consecutive.  */
29964           e = XEXP (SET_DEST (e), 0);
29965           if (GET_CODE (e) == PLUS)
29966             gcc_assert (REG_P (XEXP (e, 0))
29967                         && REGNO (XEXP (e, 0)) == SP_REGNUM
29968                         && CONST_INT_P (XEXP (e, 1))
29969                         && offset == INTVAL (XEXP (e, 1)));
29970           else
29971             gcc_assert (i == 1
29972                         && REG_P (e)
29973                         && REGNO (e) == SP_REGNUM);
29974           offset += reg_size;
29975         }
29976     }
29977   fprintf (out_file, "}\n");
29978   if (padfirst)
29979     fprintf (out_file, "\t.pad #%d\n", padfirst);
29980 }
29981
29982 /*  Emit unwind directives for a SET.  */
29983
29984 static void
29985 arm_unwind_emit_set (FILE * out_file, rtx p)
29986 {
29987   rtx e0;
29988   rtx e1;
29989   unsigned reg;
29990
29991   e0 = XEXP (p, 0);
29992   e1 = XEXP (p, 1);
29993   switch (GET_CODE (e0))
29994     {
29995     case MEM:
29996       /* Pushing a single register.  */
29997       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29998           || !REG_P (XEXP (XEXP (e0, 0), 0))
29999           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
30000         abort ();
30001
30002       asm_fprintf (out_file, "\t.save ");
30003       if (IS_VFP_REGNUM (REGNO (e1)))
30004         asm_fprintf(out_file, "{d%d}\n",
30005                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
30006       else
30007         asm_fprintf(out_file, "{%r}\n", REGNO (e1));
30008       break;
30009
30010     case REG:
30011       if (REGNO (e0) == SP_REGNUM)
30012         {
30013           /* A stack increment.  */
30014           if (GET_CODE (e1) != PLUS
30015               || !REG_P (XEXP (e1, 0))
30016               || REGNO (XEXP (e1, 0)) != SP_REGNUM
30017               || !CONST_INT_P (XEXP (e1, 1)))
30018             abort ();
30019
30020           asm_fprintf (out_file, "\t.pad #%wd\n",
30021                        -INTVAL (XEXP (e1, 1)));
30022         }
30023       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
30024         {
30025           HOST_WIDE_INT offset;
30026
30027           if (GET_CODE (e1) == PLUS)
30028             {
30029               if (!REG_P (XEXP (e1, 0))
30030                   || !CONST_INT_P (XEXP (e1, 1)))
30031                 abort ();
30032               reg = REGNO (XEXP (e1, 0));
30033               offset = INTVAL (XEXP (e1, 1));
30034               asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
30035                            HARD_FRAME_POINTER_REGNUM, reg,
30036                            offset);
30037             }
30038           else if (REG_P (e1))
30039             {
30040               reg = REGNO (e1);
30041               asm_fprintf (out_file, "\t.setfp %r, %r\n",
30042                            HARD_FRAME_POINTER_REGNUM, reg);
30043             }
30044           else
30045             abort ();
30046         }
30047       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30048         {
30049           /* Move from sp to reg.  */
30050           asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30051         }
30052       else if (GET_CODE (e1) == PLUS
30053               && REG_P (XEXP (e1, 0))
30054               && REGNO (XEXP (e1, 0)) == SP_REGNUM
30055               && CONST_INT_P (XEXP (e1, 1)))
30056         {
30057           /* Set reg to offset from sp.  */
30058           asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30059                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30060         }
30061       else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30062         {
30063           if (cfun->machine->pacspval_needed)
30064             asm_fprintf (out_file, "\t.pacspval\n");
30065         }
30066       else
30067         abort ();
30068       break;
30069
30070     default:
30071       abort ();
30072     }
30073 }
30074
30075
30076 /* Emit unwind directives for the given insn.  */
30077
30078 static void
30079 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30080 {
30081   rtx note, pat;
30082   bool handled_one = false;
30083
30084   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30085     return;
30086
30087   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30088       && (TREE_NOTHROW (current_function_decl)
30089           || crtl->all_throwers_are_sibcalls))
30090     return;
30091
30092   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30093     return;
30094
30095   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30096     {
30097       switch (REG_NOTE_KIND (note))
30098         {
30099         case REG_FRAME_RELATED_EXPR:
30100           pat = XEXP (note, 0);
30101           goto found;
30102
30103         case REG_CFA_REGISTER:
30104           pat = XEXP (note, 0);
30105           if (pat == NULL)
30106             {
30107               pat = PATTERN (insn);
30108               if (GET_CODE (pat) == PARALLEL)
30109                 pat = XVECEXP (pat, 0, 0);
30110             }
30111
30112           /* Only emitted for IS_STACKALIGN re-alignment.  */
30113           {
30114             rtx dest, src;
30115             unsigned reg;
30116
30117             src = SET_SRC (pat);
30118             dest = SET_DEST (pat);
30119
30120             gcc_assert (src == stack_pointer_rtx
30121                         || IS_PAC_REGNUM (REGNO (src)));
30122             reg = REGNO (dest);
30123
30124             if (IS_PAC_REGNUM (REGNO (src)))
30125               arm_unwind_emit_set (out_file, PATTERN (insn));
30126             else
30127               asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30128                            reg + 0x90, reg);
30129           }
30130           handled_one = true;
30131           break;
30132
30133         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
30134            to get correct dwarf information for shrink-wrap.  We should not
30135            emit unwind information for it because these are used either for
30136            pretend arguments or notes to adjust sp and restore registers from
30137            stack.  */
30138         case REG_CFA_DEF_CFA:
30139         case REG_CFA_ADJUST_CFA:
30140         case REG_CFA_RESTORE:
30141           return;
30142
30143         case REG_CFA_EXPRESSION:
30144         case REG_CFA_OFFSET:
30145           /* ??? Only handling here what we actually emit.  */
30146           gcc_unreachable ();
30147
30148         default:
30149           break;
30150         }
30151     }
30152   if (handled_one)
30153     return;
30154   pat = PATTERN (insn);
30155  found:
30156
30157   switch (GET_CODE (pat))
30158     {
30159     case SET:
30160       arm_unwind_emit_set (out_file, pat);
30161       break;
30162
30163     case SEQUENCE:
30164       /* Store multiple.  */
30165       arm_unwind_emit_sequence (out_file, pat);
30166       break;
30167
30168     default:
30169       abort();
30170     }
30171 }
30172
30173
30174 /* Output a reference from a function exception table to the type_info
30175    object X.  The EABI specifies that the symbol should be relocated by
30176    an R_ARM_TARGET2 relocation.  */
30177
30178 static bool
30179 arm_output_ttype (rtx x)
30180 {
30181   fputs ("\t.word\t", asm_out_file);
30182   output_addr_const (asm_out_file, x);
30183   /* Use special relocations for symbol references.  */
30184   if (!CONST_INT_P (x))
30185     fputs ("(TARGET2)", asm_out_file);
30186   fputc ('\n', asm_out_file);
30187
30188   return TRUE;
30189 }
30190
30191 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
30192
30193 static void
30194 arm_asm_emit_except_personality (rtx personality)
30195 {
30196   fputs ("\t.personality\t", asm_out_file);
30197   output_addr_const (asm_out_file, personality);
30198   fputc ('\n', asm_out_file);
30199 }
30200 #endif /* ARM_UNWIND_INFO */
30201
30202 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
30203
30204 static void
30205 arm_asm_init_sections (void)
30206 {
30207 #if ARM_UNWIND_INFO
30208   exception_section = get_unnamed_section (0, output_section_asm_op,
30209                                            "\t.handlerdata");
30210 #endif /* ARM_UNWIND_INFO */
30211
30212 #ifdef OBJECT_FORMAT_ELF
30213   if (target_pure_code)
30214     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30215 #endif
30216 }
30217
30218 /* Output unwind directives for the start/end of a function.  */
30219
30220 void
30221 arm_output_fn_unwind (FILE * f, bool prologue)
30222 {
30223   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30224     return;
30225
30226   if (prologue)
30227     fputs ("\t.fnstart\n", f);
30228   else
30229     {
30230       /* If this function will never be unwound, then mark it as such.
30231          The came condition is used in arm_unwind_emit to suppress
30232          the frame annotations.  */
30233       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30234           && (TREE_NOTHROW (current_function_decl)
30235               || crtl->all_throwers_are_sibcalls))
30236         fputs("\t.cantunwind\n", f);
30237
30238       fputs ("\t.fnend\n", f);
30239     }
30240 }
30241
30242 static bool
30243 arm_emit_tls_decoration (FILE *fp, rtx x)
30244 {
30245   enum tls_reloc reloc;
30246   rtx val;
30247
30248   val = XVECEXP (x, 0, 0);
30249   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30250
30251   output_addr_const (fp, val);
30252
30253   switch (reloc)
30254     {
30255     case TLS_GD32:
30256       fputs ("(tlsgd)", fp);
30257       break;
30258     case TLS_GD32_FDPIC:
30259       fputs ("(tlsgd_fdpic)", fp);
30260       break;
30261     case TLS_LDM32:
30262       fputs ("(tlsldm)", fp);
30263       break;
30264     case TLS_LDM32_FDPIC:
30265       fputs ("(tlsldm_fdpic)", fp);
30266       break;
30267     case TLS_LDO32:
30268       fputs ("(tlsldo)", fp);
30269       break;
30270     case TLS_IE32:
30271       fputs ("(gottpoff)", fp);
30272       break;
30273     case TLS_IE32_FDPIC:
30274       fputs ("(gottpoff_fdpic)", fp);
30275       break;
30276     case TLS_LE32:
30277       fputs ("(tpoff)", fp);
30278       break;
30279     case TLS_DESCSEQ:
30280       fputs ("(tlsdesc)", fp);
30281       break;
30282     default:
30283       gcc_unreachable ();
30284     }
30285
30286   switch (reloc)
30287     {
30288     case TLS_GD32:
30289     case TLS_LDM32:
30290     case TLS_IE32:
30291     case TLS_DESCSEQ:
30292       fputs (" + (. - ", fp);
30293       output_addr_const (fp, XVECEXP (x, 0, 2));
30294       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30295       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30296       output_addr_const (fp, XVECEXP (x, 0, 3));
30297       fputc (')', fp);
30298       break;
30299     default:
30300       break;
30301     }
30302
30303   return TRUE;
30304 }
30305
30306 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
30307
30308 static void
30309 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30310 {
30311   gcc_assert (size == 4);
30312   fputs ("\t.word\t", file);
30313   output_addr_const (file, x);
30314   fputs ("(tlsldo)", file);
30315 }
30316
30317 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
30318
30319 static bool
30320 arm_output_addr_const_extra (FILE *fp, rtx x)
30321 {
30322   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30323     return arm_emit_tls_decoration (fp, x);
30324   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30325     {
30326       char label[256];
30327       int labelno = INTVAL (XVECEXP (x, 0, 0));
30328
30329       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30330       assemble_name_raw (fp, label);
30331
30332       return TRUE;
30333     }
30334   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30335     {
30336       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30337       if (GOT_PCREL)
30338         fputs ("+.", fp);
30339       fputs ("-(", fp);
30340       output_addr_const (fp, XVECEXP (x, 0, 0));
30341       fputc (')', fp);
30342       return TRUE;
30343     }
30344   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30345     {
30346       output_addr_const (fp, XVECEXP (x, 0, 0));
30347       if (GOT_PCREL)
30348         fputs ("+.", fp);
30349       fputs ("-(", fp);
30350       output_addr_const (fp, XVECEXP (x, 0, 1));
30351       fputc (')', fp);
30352       return TRUE;
30353     }
30354   else if (GET_CODE (x) == CONST_VECTOR)
30355     return arm_emit_vector_const (fp, x);
30356
30357   return FALSE;
30358 }
30359
30360 /* Output assembly for a shift instruction.
30361    SET_FLAGS determines how the instruction modifies the condition codes.
30362    0 - Do not set condition codes.
30363    1 - Set condition codes.
30364    2 - Use smallest instruction.  */
30365 const char *
30366 arm_output_shift(rtx * operands, int set_flags)
30367 {
30368   char pattern[100];
30369   static const char flag_chars[3] = {'?', '.', '!'};
30370   const char *shift;
30371   HOST_WIDE_INT val;
30372   char c;
30373
30374   c = flag_chars[set_flags];
30375   shift = shift_op(operands[3], &val);
30376   if (shift)
30377     {
30378       if (val != -1)
30379         operands[2] = GEN_INT(val);
30380       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30381     }
30382   else
30383     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30384
30385   output_asm_insn (pattern, operands);
30386   return "";
30387 }
30388
30389 /* Output assembly for a WMMX immediate shift instruction.  */
30390 const char *
30391 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30392 {
30393   int shift = INTVAL (operands[2]);
30394   char templ[50];
30395   machine_mode opmode = GET_MODE (operands[0]);
30396
30397   gcc_assert (shift >= 0);
30398
30399   /* If the shift value in the register versions is > 63 (for D qualifier),
30400      31 (for W qualifier) or 15 (for H qualifier).  */
30401   if (((opmode == V4HImode) && (shift > 15))
30402         || ((opmode == V2SImode) && (shift > 31))
30403         || ((opmode == DImode) && (shift > 63)))
30404   {
30405     if (wror_or_wsra)
30406       {
30407         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30408         output_asm_insn (templ, operands);
30409         if (opmode == DImode)
30410           {
30411             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30412             output_asm_insn (templ, operands);
30413           }
30414       }
30415     else
30416       {
30417         /* The destination register will contain all zeros.  */
30418         sprintf (templ, "wzero\t%%0");
30419         output_asm_insn (templ, operands);
30420       }
30421     return "";
30422   }
30423
30424   if ((opmode == DImode) && (shift > 32))
30425     {
30426       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30427       output_asm_insn (templ, operands);
30428       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30429       output_asm_insn (templ, operands);
30430     }
30431   else
30432     {
30433       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30434       output_asm_insn (templ, operands);
30435     }
30436   return "";
30437 }
30438
30439 /* Output assembly for a WMMX tinsr instruction.  */
30440 const char *
30441 arm_output_iwmmxt_tinsr (rtx *operands)
30442 {
30443   int mask = INTVAL (operands[3]);
30444   int i;
30445   char templ[50];
30446   int units = mode_nunits[GET_MODE (operands[0])];
30447   gcc_assert ((mask & (mask - 1)) == 0);
30448   for (i = 0; i < units; ++i)
30449     {
30450       if ((mask & 0x01) == 1)
30451         {
30452           break;
30453         }
30454       mask >>= 1;
30455     }
30456   gcc_assert (i < units);
30457   {
30458     switch (GET_MODE (operands[0]))
30459       {
30460       case E_V8QImode:
30461         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30462         break;
30463       case E_V4HImode:
30464         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30465         break;
30466       case E_V2SImode:
30467         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30468         break;
30469       default:
30470         gcc_unreachable ();
30471         break;
30472       }
30473     output_asm_insn (templ, operands);
30474   }
30475   return "";
30476 }
30477
30478 /* Output an arm casesi dispatch sequence.  Used by arm_casesi_internal insn.
30479    Responsible for the handling of switch statements in arm.  */
30480 const char *
30481 arm_output_casesi (rtx *operands)
30482 {
30483   char label[100];
30484   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30485   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30486   output_asm_insn ("cmp\t%0, %1", operands);
30487   output_asm_insn ("bhi\t%l3", operands);
30488   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
30489   switch (GET_MODE (diff_vec))
30490     {
30491     case E_QImode:
30492       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30493         output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
30494       else
30495         output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
30496       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30497       break;
30498     case E_HImode:
30499       if (REGNO (operands[4]) != REGNO (operands[5]))
30500         {
30501           output_asm_insn ("add\t%4, %0, %0", operands);
30502           if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30503             output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
30504           else
30505             output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
30506         }
30507       else
30508         {
30509           output_asm_insn ("add\t%4, %5, %0", operands);
30510           if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30511             output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
30512           else
30513             output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
30514         }
30515       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30516       break;
30517     case E_SImode:
30518       if (flag_pic)
30519         {
30520           output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
30521           output_asm_insn ("add\t%|pc, %|pc, %4", operands);
30522         }
30523       else
30524         output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
30525       break;
30526     default:
30527       gcc_unreachable ();
30528     }
30529     assemble_label (asm_out_file, label);
30530     output_asm_insn ("nop", operands);
30531   return "";
30532 }
30533
30534 /* Output a Thumb-1 casesi dispatch sequence.  */
30535 const char *
30536 thumb1_output_casesi (rtx *operands)
30537 {
30538   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30539
30540   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30541
30542   switch (GET_MODE(diff_vec))
30543     {
30544     case E_QImode:
30545       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30546               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30547     case E_HImode:
30548       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30549               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30550     case E_SImode:
30551       return "bl\t%___gnu_thumb1_case_si";
30552     default:
30553       gcc_unreachable ();
30554     }
30555 }
30556
30557 /* Output a Thumb-2 casesi instruction.  */
30558 const char *
30559 thumb2_output_casesi (rtx *operands)
30560 {
30561   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30562
30563   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30564
30565   output_asm_insn ("cmp\t%0, %1", operands);
30566   output_asm_insn ("bhi\t%l3", operands);
30567   switch (GET_MODE(diff_vec))
30568     {
30569     case E_QImode:
30570       return "tbb\t[%|pc, %0]";
30571     case E_HImode:
30572       return "tbh\t[%|pc, %0, lsl #1]";
30573     case E_SImode:
30574       if (flag_pic)
30575         {
30576           output_asm_insn ("adr\t%4, %l2", operands);
30577           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30578           output_asm_insn ("add\t%4, %4, %5", operands);
30579           return "bx\t%4";
30580         }
30581       else
30582         {
30583           output_asm_insn ("adr\t%4, %l2", operands);
30584           return "ldr\t%|pc, [%4, %0, lsl #2]";
30585         }
30586     default:
30587       gcc_unreachable ();
30588     }
30589 }
30590
30591 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30592    per-core tuning structs.  */
30593 static int
30594 arm_issue_rate (void)
30595 {
30596   return current_tune->issue_rate;
30597 }
30598
30599 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30600 static int
30601 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30602 {
30603   if (DEBUG_INSN_P (insn))
30604     return more;
30605
30606   rtx_code code = GET_CODE (PATTERN (insn));
30607   if (code == USE || code == CLOBBER)
30608     return more;
30609
30610   if (get_attr_type (insn) == TYPE_NO_INSN)
30611     return more;
30612
30613   return more - 1;
30614 }
30615
30616 /* Return how many instructions should scheduler lookahead to choose the
30617    best one.  */
30618 static int
30619 arm_first_cycle_multipass_dfa_lookahead (void)
30620 {
30621   int issue_rate = arm_issue_rate ();
30622
30623   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30624 }
30625
30626 /* Enable modeling of L2 auto-prefetcher.  */
30627 static int
30628 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30629 {
30630   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30631 }
30632
30633 const char *
30634 arm_mangle_type (const_tree type)
30635 {
30636   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30637      has to be managled as if it is in the "std" namespace.  */
30638   if (TARGET_AAPCS_BASED
30639       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30640     return "St9__va_list";
30641
30642   /* Half-precision floating point types.  */
30643   if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
30644     {
30645       if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30646         return NULL;
30647       if (TYPE_MODE (type) == BFmode)
30648         return "u6__bf16";
30649       else
30650         return "Dh";
30651     }
30652
30653   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30654      builtin type.  */
30655   if (TYPE_NAME (type) != NULL)
30656     return arm_mangle_builtin_type (type);
30657
30658   /* Use the default mangling.  */
30659   return NULL;
30660 }
30661
30662 /* Order of allocation of core registers for Thumb: this allocation is
30663    written over the corresponding initial entries of the array
30664    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30665    first.  Saving and restoring a low register is usually cheaper than
30666    using a call-clobbered high register.  */
30667
30668 static const int thumb_core_reg_alloc_order[] =
30669 {
30670    3,  2,  1,  0,  4,  5,  6,  7,
30671   12, 14,  8,  9, 10, 11
30672 };
30673
30674 /* Adjust register allocation order when compiling for Thumb.  */
30675
30676 void
30677 arm_order_regs_for_local_alloc (void)
30678 {
30679   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30680   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30681   if (TARGET_THUMB)
30682     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30683             sizeof (thumb_core_reg_alloc_order));
30684 }
30685
30686 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30687
30688 bool
30689 arm_frame_pointer_required (void)
30690 {
30691   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30692     return true;
30693
30694   /* If the function receives nonlocal gotos, it needs to save the frame
30695      pointer in the nonlocal_goto_save_area object.  */
30696   if (cfun->has_nonlocal_label)
30697     return true;
30698
30699   /* The frame pointer is required for non-leaf APCS frames.  */
30700   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30701     return true;
30702
30703   /* If we are probing the stack in the prologue, we will have a faulting
30704      instruction prior to the stack adjustment and this requires a frame
30705      pointer if we want to catch the exception using the EABI unwinder.  */
30706   if (!IS_INTERRUPT (arm_current_func_type ())
30707       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30708           || flag_stack_clash_protection)
30709       && arm_except_unwind_info (&global_options) == UI_TARGET
30710       && cfun->can_throw_non_call_exceptions)
30711     {
30712       HOST_WIDE_INT size = get_frame_size ();
30713
30714       /* That's irrelevant if there is no stack adjustment.  */
30715       if (size <= 0)
30716         return false;
30717
30718       /* That's relevant only if there is a stack probe.  */
30719       if (crtl->is_leaf && !cfun->calls_alloca)
30720         {
30721           /* We don't have the final size of the frame so adjust.  */
30722           size += 32 * UNITS_PER_WORD;
30723           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30724             return true;
30725         }
30726       else
30727         return true;
30728     }
30729
30730   return false;
30731 }
30732
30733 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30734    All modes except THUMB1 have conditional execution.
30735    If we have conditional arithmetic, return false before reload to
30736    enable some ifcvt transformations. */
30737 static bool
30738 arm_have_conditional_execution (void)
30739 {
30740   bool has_cond_exec, enable_ifcvt_trans;
30741
30742   /* Only THUMB1 cannot support conditional execution. */
30743   has_cond_exec = !TARGET_THUMB1;
30744
30745   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30746      before reload. */
30747   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30748
30749   return has_cond_exec && !enable_ifcvt_trans;
30750 }
30751
30752 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30753 static HOST_WIDE_INT
30754 arm_vector_alignment (const_tree type)
30755 {
30756   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30757
30758   if (TARGET_AAPCS_BASED)
30759     align = MIN (align, 64);
30760
30761   return align;
30762 }
30763
30764 static unsigned int
30765 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30766 {
30767   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30768     {
30769       modes->safe_push (V16QImode);
30770       modes->safe_push (V8QImode);
30771     }
30772   return 0;
30773 }
30774
30775 static bool
30776 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30777 {
30778   /* Vectors which aren't in packed structures will not be less aligned than
30779      the natural alignment of their element type, so this is safe.  */
30780   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30781     return !is_packed;
30782
30783   return default_builtin_vector_alignment_reachable (type, is_packed);
30784 }
30785
30786 static bool
30787 arm_builtin_support_vector_misalignment (machine_mode mode,
30788                                          const_tree type, int misalignment,
30789                                          bool is_packed)
30790 {
30791   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30792     {
30793       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30794
30795       if (is_packed)
30796         return align == 1;
30797
30798       /* If the misalignment is unknown, we should be able to handle the access
30799          so long as it is not to a member of a packed data structure.  */
30800       if (misalignment == -1)
30801         return true;
30802
30803       /* Return true if the misalignment is a multiple of the natural alignment
30804          of the vector's element type.  This is probably always going to be
30805          true in practice, since we've already established that this isn't a
30806          packed access.  */
30807       return ((misalignment % align) == 0);
30808     }
30809
30810   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30811                                                       is_packed);
30812 }
30813
30814 static void
30815 arm_conditional_register_usage (void)
30816 {
30817   int regno;
30818
30819   if (TARGET_THUMB1 && optimize_size)
30820     {
30821       /* When optimizing for size on Thumb-1, it's better not
30822         to use the HI regs, because of the overhead of
30823         stacking them.  */
30824       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30825         fixed_regs[regno] = call_used_regs[regno] = 1;
30826     }
30827
30828   /* The link register can be clobbered by any branch insn,
30829      but we have no way to track that at present, so mark
30830      it as unavailable.  */
30831   if (TARGET_THUMB1)
30832     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30833
30834   if (TARGET_32BIT && TARGET_VFP_BASE)
30835     {
30836       /* VFPv3 registers are disabled when earlier VFP
30837          versions are selected due to the definition of
30838          LAST_VFP_REGNUM.  */
30839       for (regno = FIRST_VFP_REGNUM;
30840            regno <= LAST_VFP_REGNUM; ++ regno)
30841         {
30842           fixed_regs[regno] = 0;
30843           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30844             || regno >= FIRST_VFP_REGNUM + 32;
30845         }
30846       if (TARGET_HAVE_MVE)
30847         fixed_regs[VPR_REGNUM] = 0;
30848     }
30849
30850   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30851     {
30852       regno = FIRST_IWMMXT_GR_REGNUM;
30853       /* The 2002/10/09 revision of the XScale ABI has wCG0
30854          and wCG1 as call-preserved registers.  The 2002/11/21
30855          revision changed this so that all wCG registers are
30856          scratch registers.  */
30857       for (regno = FIRST_IWMMXT_GR_REGNUM;
30858            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30859         fixed_regs[regno] = 0;
30860       /* The XScale ABI has wR0 - wR9 as scratch registers,
30861          the rest as call-preserved registers.  */
30862       for (regno = FIRST_IWMMXT_REGNUM;
30863            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30864         {
30865           fixed_regs[regno] = 0;
30866           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30867         }
30868     }
30869
30870   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30871     {
30872       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30873       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30874     }
30875   else if (TARGET_APCS_STACK)
30876     {
30877       fixed_regs[10]     = 1;
30878       call_used_regs[10] = 1;
30879     }
30880   /* -mcaller-super-interworking reserves r11 for calls to
30881      _interwork_r11_call_via_rN().  Making the register global
30882      is an easy way of ensuring that it remains valid for all
30883      calls.  */
30884   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30885       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30886     {
30887       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30888       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30889       if (TARGET_CALLER_INTERWORKING)
30890         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30891     }
30892
30893   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30894   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30895   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30896
30897   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30898 }
30899
30900 static reg_class_t
30901 arm_preferred_rename_class (reg_class_t rclass)
30902 {
30903   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30904      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30905      and code size can be reduced.  */
30906   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30907     return LO_REGS;
30908   else
30909     return NO_REGS;
30910 }
30911
30912 /* Compute the attribute "length" of insn "*push_multi".
30913    So this function MUST be kept in sync with that insn pattern.  */
30914 int
30915 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30916 {
30917   int i, regno, hi_reg;
30918   int num_saves = XVECLEN (parallel_op, 0);
30919
30920   /* ARM mode.  */
30921   if (TARGET_ARM)
30922     return 4;
30923   /* Thumb1 mode.  */
30924   if (TARGET_THUMB1)
30925     return 2;
30926
30927   /* Thumb2 mode.  */
30928   regno = REGNO (first_op);
30929   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30930      list is 8-bit.  Normally this means all registers in the list must be
30931      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30932      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30933      with 16-bit encoding.  */
30934   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30935   for (i = 1; i < num_saves && !hi_reg; i++)
30936     {
30937       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30938       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30939     }
30940
30941   if (!hi_reg)
30942     return 2;
30943   return 4;
30944 }
30945
30946 /* Compute the attribute "length" of insn.  Currently, this function is used
30947    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30948    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30949    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30950    true if OPERANDS contains insn which explicit updates base register.  */
30951
30952 int
30953 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30954 {
30955   /* ARM mode.  */
30956   if (TARGET_ARM)
30957     return 4;
30958   /* Thumb1 mode.  */
30959   if (TARGET_THUMB1)
30960     return 2;
30961
30962   rtx parallel_op = operands[0];
30963   /* Initialize to elements number of PARALLEL.  */
30964   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30965   /* Initialize the value to base register.  */
30966   unsigned regno = REGNO (operands[1]);
30967   /* Skip return and write back pattern.
30968      We only need register pop pattern for later analysis.  */
30969   unsigned first_indx = 0;
30970   first_indx += return_pc ? 1 : 0;
30971   first_indx += write_back_p ? 1 : 0;
30972
30973   /* A pop operation can be done through LDM or POP.  If the base register is SP
30974      and if it's with write back, then a LDM will be alias of POP.  */
30975   bool pop_p = (regno == SP_REGNUM && write_back_p);
30976   bool ldm_p = !pop_p;
30977
30978   /* Check base register for LDM.  */
30979   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30980     return 4;
30981
30982   /* Check each register in the list.  */
30983   for (; indx >= first_indx; indx--)
30984     {
30985       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30986       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30987          comment in arm_attr_length_push_multi.  */
30988       if (REGNO_REG_CLASS (regno) == HI_REGS
30989           && (regno != PC_REGNUM || ldm_p))
30990         return 4;
30991     }
30992
30993   return 2;
30994 }
30995
30996 /* Compute the number of instructions emitted by output_move_double.  */
30997 int
30998 arm_count_output_move_double_insns (rtx *operands)
30999 {
31000   int count;
31001   rtx ops[2];
31002   /* output_move_double may modify the operands array, so call it
31003      here on a copy of the array.  */
31004   ops[0] = operands[0];
31005   ops[1] = operands[1];
31006   output_move_double (ops, false, &count);
31007   return count;
31008 }
31009
31010 /* Same as above, but operands are a register/memory pair in SImode.
31011    Assumes operands has the base register in position 0 and memory in position
31012    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
31013 int
31014 arm_count_ldrdstrd_insns (rtx *operands, bool load)
31015 {
31016   int count;
31017   rtx ops[2];
31018   int regnum, memnum;
31019   if (load)
31020     regnum = 0, memnum = 1;
31021   else
31022     regnum = 1, memnum = 0;
31023   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
31024   ops[memnum] = adjust_address (operands[2], DImode, 0);
31025   output_move_double (ops, false, &count);
31026   return count;
31027 }
31028
31029
31030 int
31031 vfp3_const_double_for_fract_bits (rtx operand)
31032 {
31033   REAL_VALUE_TYPE r0;
31034
31035   if (!CONST_DOUBLE_P (operand))
31036     return 0;
31037
31038   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
31039   if (exact_real_inverse (DFmode, &r0)
31040       && !REAL_VALUE_NEGATIVE (r0))
31041     {
31042       if (exact_real_truncate (DFmode, &r0))
31043         {
31044           HOST_WIDE_INT value = real_to_integer (&r0);
31045           value = value & 0xffffffff;
31046           if ((value != 0) && ( (value & (value - 1)) == 0))
31047             {
31048               int ret = exact_log2 (value);
31049               gcc_assert (IN_RANGE (ret, 0, 31));
31050               return ret;
31051             }
31052         }
31053     }
31054   return 0;
31055 }
31056
31057 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31058    log2 is in [1, 32], return that log2.  Otherwise return -1.
31059    This is used in the patterns for vcvt.s32.f32 floating-point to
31060    fixed-point conversions.  */
31061
31062 int
31063 vfp3_const_double_for_bits (rtx x)
31064 {
31065   const REAL_VALUE_TYPE *r;
31066
31067   if (!CONST_DOUBLE_P (x))
31068     return -1;
31069
31070   r = CONST_DOUBLE_REAL_VALUE (x);
31071
31072   if (REAL_VALUE_NEGATIVE (*r)
31073       || REAL_VALUE_ISNAN (*r)
31074       || REAL_VALUE_ISINF (*r)
31075       || !real_isinteger (r, SFmode))
31076     return -1;
31077
31078   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
31079
31080 /* The exact_log2 above will have returned -1 if this is
31081    not an exact log2.  */
31082   if (!IN_RANGE (hwint, 1, 32))
31083     return -1;
31084
31085   return hwint;
31086 }
31087
31088 \f
31089 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
31090
31091 static void
31092 arm_pre_atomic_barrier (enum memmodel model)
31093 {
31094   if (need_atomic_barrier_p (model, true))
31095     emit_insn (gen_memory_barrier ());
31096 }
31097
31098 static void
31099 arm_post_atomic_barrier (enum memmodel model)
31100 {
31101   if (need_atomic_barrier_p (model, false))
31102     emit_insn (gen_memory_barrier ());
31103 }
31104
31105 /* Emit the load-exclusive and store-exclusive instructions.
31106    Use acquire and release versions if necessary.  */
31107
31108 static void
31109 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31110 {
31111   rtx (*gen) (rtx, rtx);
31112
31113   if (acq)
31114     {
31115       switch (mode)
31116         {
31117         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31118         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31119         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31120         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31121         default:
31122           gcc_unreachable ();
31123         }
31124     }
31125   else
31126     {
31127       switch (mode)
31128         {
31129         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31130         case E_HImode: gen = gen_arm_load_exclusivehi; break;
31131         case E_SImode: gen = gen_arm_load_exclusivesi; break;
31132         case E_DImode: gen = gen_arm_load_exclusivedi; break;
31133         default:
31134           gcc_unreachable ();
31135         }
31136     }
31137
31138   emit_insn (gen (rval, mem));
31139 }
31140
31141 static void
31142 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31143                           rtx mem, bool rel)
31144 {
31145   rtx (*gen) (rtx, rtx, rtx);
31146
31147   if (rel)
31148     {
31149       switch (mode)
31150         {
31151         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31152         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31153         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31154         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31155         default:
31156           gcc_unreachable ();
31157         }
31158     }
31159   else
31160     {
31161       switch (mode)
31162         {
31163         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31164         case E_HImode: gen = gen_arm_store_exclusivehi; break;
31165         case E_SImode: gen = gen_arm_store_exclusivesi; break;
31166         case E_DImode: gen = gen_arm_store_exclusivedi; break;
31167         default:
31168           gcc_unreachable ();
31169         }
31170     }
31171
31172   emit_insn (gen (bval, rval, mem));
31173 }
31174
31175 /* Mark the previous jump instruction as unlikely.  */
31176
31177 static void
31178 emit_unlikely_jump (rtx insn)
31179 {
31180   rtx_insn *jump = emit_jump_insn (insn);
31181   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31182 }
31183
31184 /* Expand a compare and swap pattern.  */
31185
31186 void
31187 arm_expand_compare_and_swap (rtx operands[])
31188 {
31189   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31190   machine_mode mode, cmp_mode;
31191
31192   bval = operands[0];
31193   rval = operands[1];
31194   mem = operands[2];
31195   oldval = operands[3];
31196   newval = operands[4];
31197   is_weak = operands[5];
31198   mod_s = operands[6];
31199   mod_f = operands[7];
31200   mode = GET_MODE (mem);
31201
31202   /* Normally the succ memory model must be stronger than fail, but in the
31203      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31204      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
31205
31206   if (TARGET_HAVE_LDACQ
31207       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31208       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31209     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31210
31211   switch (mode)
31212     {
31213     case E_QImode:
31214     case E_HImode:
31215       /* For narrow modes, we're going to perform the comparison in SImode,
31216          so do the zero-extension now.  */
31217       rval = gen_reg_rtx (SImode);
31218       oldval = convert_modes (SImode, mode, oldval, true);
31219       /* FALLTHRU */
31220
31221     case E_SImode:
31222       /* Force the value into a register if needed.  We waited until after
31223          the zero-extension above to do this properly.  */
31224       if (!arm_add_operand (oldval, SImode))
31225         oldval = force_reg (SImode, oldval);
31226       break;
31227
31228     case E_DImode:
31229       if (!cmpdi_operand (oldval, mode))
31230         oldval = force_reg (mode, oldval);
31231       break;
31232
31233     default:
31234       gcc_unreachable ();
31235     }
31236
31237   if (TARGET_THUMB1)
31238     cmp_mode = E_SImode;
31239   else
31240     cmp_mode = CC_Zmode;
31241
31242   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31243   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31244                                         oldval, newval, is_weak, mod_s, mod_f));
31245
31246   if (mode == QImode || mode == HImode)
31247     emit_move_insn (operands[1], gen_lowpart (mode, rval));
31248
31249   /* In all cases, we arrange for success to be signaled by Z set.
31250      This arrangement allows for the boolean result to be used directly
31251      in a subsequent branch, post optimization.  For Thumb-1 targets, the
31252      boolean negation of the result is also stored in bval because Thumb-1
31253      backend lacks dependency tracking for CC flag due to flag-setting not
31254      being represented at RTL level.  */
31255   if (TARGET_THUMB1)
31256       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31257   else
31258     {
31259       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31260       emit_insn (gen_rtx_SET (bval, x));
31261     }
31262 }
31263
31264 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
31265    another memory store between the load-exclusive and store-exclusive can
31266    reset the monitor from Exclusive to Open state.  This means we must wait
31267    until after reload to split the pattern, lest we get a register spill in
31268    the middle of the atomic sequence.  Success of the compare and swap is
31269    indicated by the Z flag set for 32bit targets and by neg_bval being zero
31270    for Thumb-1 targets (ie. negation of the boolean value returned by
31271    atomic_compare_and_swapmode standard pattern in operand 0).  */
31272
31273 void
31274 arm_split_compare_and_swap (rtx operands[])
31275 {
31276   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31277   machine_mode mode;
31278   enum memmodel mod_s, mod_f;
31279   bool is_weak;
31280   rtx_code_label *label1, *label2;
31281   rtx x, cond;
31282
31283   rval = operands[1];
31284   mem = operands[2];
31285   oldval = operands[3];
31286   newval = operands[4];
31287   is_weak = (operands[5] != const0_rtx);
31288   mod_s_rtx = operands[6];
31289   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31290   mod_f = memmodel_from_int (INTVAL (operands[7]));
31291   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31292   mode = GET_MODE (mem);
31293
31294   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31295
31296   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31297   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31298
31299   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
31300      a full barrier is emitted after the store-release.  */
31301   if (is_armv8_sync)
31302     use_acquire = false;
31303
31304   /* Checks whether a barrier is needed and emits one accordingly.  */
31305   if (!(use_acquire || use_release))
31306     arm_pre_atomic_barrier (mod_s);
31307
31308   label1 = NULL;
31309   if (!is_weak)
31310     {
31311       label1 = gen_label_rtx ();
31312       emit_label (label1);
31313     }
31314   label2 = gen_label_rtx ();
31315
31316   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31317
31318   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31319      as required to communicate with arm_expand_compare_and_swap.  */
31320   if (TARGET_32BIT)
31321     {
31322       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31323       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31324       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31325                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31326       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31327     }
31328   else
31329     {
31330       cond = gen_rtx_NE (VOIDmode, rval, oldval);
31331       if (thumb1_cmpneg_operand (oldval, SImode))
31332         {
31333           rtx src = rval;
31334           if (!satisfies_constraint_L (oldval))
31335             {
31336               gcc_assert (satisfies_constraint_J (oldval));
31337
31338               /* For such immediates, ADDS needs the source and destination regs
31339                  to be the same.
31340
31341                  Normally this would be handled by RA, but this is all happening
31342                  after RA.  */
31343               emit_move_insn (neg_bval, rval);
31344               src = neg_bval;
31345             }
31346
31347           emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31348                                                        label2, cond));
31349         }
31350       else
31351         {
31352           emit_move_insn (neg_bval, const1_rtx);
31353           emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31354         }
31355     }
31356
31357   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31358
31359   /* Weak or strong, we want EQ to be true for success, so that we
31360      match the flags that we got from the compare above.  */
31361   if (TARGET_32BIT)
31362     {
31363       cond = gen_rtx_REG (CCmode, CC_REGNUM);
31364       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31365       emit_insn (gen_rtx_SET (cond, x));
31366     }
31367
31368   if (!is_weak)
31369     {
31370       /* Z is set to boolean value of !neg_bval, as required to communicate
31371          with arm_expand_compare_and_swap.  */
31372       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31373       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31374     }
31375
31376   if (!is_mm_relaxed (mod_f))
31377     emit_label (label2);
31378
31379   /* Checks whether a barrier is needed and emits one accordingly.  */
31380   if (is_armv8_sync
31381       || !(use_acquire || use_release))
31382     arm_post_atomic_barrier (mod_s);
31383
31384   if (is_mm_relaxed (mod_f))
31385     emit_label (label2);
31386 }
31387
31388 /* Split an atomic operation pattern.  Operation is given by CODE and is one
31389    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31390    operation).  Operation is performed on the content at MEM and on VALUE
31391    following the memory model MODEL_RTX.  The content at MEM before and after
31392    the operation is returned in OLD_OUT and NEW_OUT respectively while the
31393    success of the operation is returned in COND.  Using a scratch register or
31394    an operand register for these determines what result is returned for that
31395    pattern.  */
31396
31397 void
31398 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31399                      rtx value, rtx model_rtx, rtx cond)
31400 {
31401   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31402   machine_mode mode = GET_MODE (mem);
31403   machine_mode wmode = (mode == DImode ? DImode : SImode);
31404   rtx_code_label *label;
31405   bool all_low_regs, bind_old_new;
31406   rtx x;
31407
31408   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31409
31410   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31411   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31412
31413   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
31414      a full barrier is emitted after the store-release.  */
31415   if (is_armv8_sync)
31416     use_acquire = false;
31417
31418   /* Checks whether a barrier is needed and emits one accordingly.  */
31419   if (!(use_acquire || use_release))
31420     arm_pre_atomic_barrier (model);
31421
31422   label = gen_label_rtx ();
31423   emit_label (label);
31424
31425   if (new_out)
31426     new_out = gen_lowpart (wmode, new_out);
31427   if (old_out)
31428     old_out = gen_lowpart (wmode, old_out);
31429   else
31430     old_out = new_out;
31431   value = simplify_gen_subreg (wmode, value, mode, 0);
31432
31433   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31434
31435   /* Does the operation require destination and first operand to use the same
31436      register?  This is decided by register constraints of relevant insn
31437      patterns in thumb1.md.  */
31438   gcc_assert (!new_out || REG_P (new_out));
31439   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31440                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31441                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31442   bind_old_new =
31443     (TARGET_THUMB1
31444      && code != SET
31445      && code != MINUS
31446      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31447
31448   /* We want to return the old value while putting the result of the operation
31449      in the same register as the old value so copy the old value over to the
31450      destination register and use that register for the operation.  */
31451   if (old_out && bind_old_new)
31452     {
31453       emit_move_insn (new_out, old_out);
31454       old_out = new_out;
31455     }
31456
31457   switch (code)
31458     {
31459     case SET:
31460       new_out = value;
31461       break;
31462
31463     case NOT:
31464       x = gen_rtx_AND (wmode, old_out, value);
31465       emit_insn (gen_rtx_SET (new_out, x));
31466       x = gen_rtx_NOT (wmode, new_out);
31467       emit_insn (gen_rtx_SET (new_out, x));
31468       break;
31469
31470     case MINUS:
31471       if (CONST_INT_P (value))
31472         {
31473           value = gen_int_mode (-INTVAL (value), wmode);
31474           code = PLUS;
31475         }
31476       /* FALLTHRU */
31477
31478     case PLUS:
31479       if (mode == DImode)
31480         {
31481           /* DImode plus/minus need to clobber flags.  */
31482           /* The adddi3 and subdi3 patterns are incorrectly written so that
31483              they require matching operands, even when we could easily support
31484              three operands.  Thankfully, this can be fixed up post-splitting,
31485              as the individual add+adc patterns do accept three operands and
31486              post-reload cprop can make these moves go away.  */
31487           emit_move_insn (new_out, old_out);
31488           if (code == PLUS)
31489             x = gen_adddi3 (new_out, new_out, value);
31490           else
31491             x = gen_subdi3 (new_out, new_out, value);
31492           emit_insn (x);
31493           break;
31494         }
31495       /* FALLTHRU */
31496
31497     default:
31498       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31499       emit_insn (gen_rtx_SET (new_out, x));
31500       break;
31501     }
31502
31503   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31504                             use_release);
31505
31506   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31507   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31508
31509   /* Checks whether a barrier is needed and emits one accordingly.  */
31510   if (is_armv8_sync
31511       || !(use_acquire || use_release))
31512     arm_post_atomic_barrier (model);
31513 }
31514 \f
31515 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
31516 opt_machine_mode
31517 arm_mode_to_pred_mode (machine_mode mode)
31518 {
31519   switch (GET_MODE_NUNITS (mode))
31520     {
31521     case 16: return V16BImode;
31522     case 8: return V8BImode;
31523     case 4: return V4BImode;
31524     case 2: return V2QImode;
31525     }
31526   return opt_machine_mode ();
31527 }
31528
31529 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31530    If CAN_INVERT, store either the result or its inverse in TARGET
31531    and return true if TARGET contains the inverse.  If !CAN_INVERT,
31532    always store the result in TARGET, never its inverse.
31533
31534    Note that the handling of floating-point comparisons is not
31535    IEEE compliant.  */
31536
31537 bool
31538 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31539                            bool can_invert)
31540 {
31541   machine_mode cmp_result_mode = GET_MODE (target);
31542   machine_mode cmp_mode = GET_MODE (op0);
31543
31544   bool inverted;
31545
31546   /* MVE supports more comparisons than Neon.  */
31547   if (TARGET_HAVE_MVE)
31548       inverted = false;
31549   else
31550     switch (code)
31551       {
31552         /* For these we need to compute the inverse of the requested
31553            comparison.  */
31554       case UNORDERED:
31555       case UNLT:
31556       case UNLE:
31557       case UNGT:
31558       case UNGE:
31559       case UNEQ:
31560       case NE:
31561         code = reverse_condition_maybe_unordered (code);
31562         if (!can_invert)
31563           {
31564             /* Recursively emit the inverted comparison into a temporary
31565                and then store its inverse in TARGET.  This avoids reusing
31566                TARGET (which for integer NE could be one of the inputs).  */
31567             rtx tmp = gen_reg_rtx (cmp_result_mode);
31568             if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31569               gcc_unreachable ();
31570             emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31571             return false;
31572           }
31573         inverted = true;
31574         break;
31575
31576       default:
31577         inverted = false;
31578         break;
31579       }
31580
31581   switch (code)
31582     {
31583     /* These are natively supported by Neon for zero comparisons, but otherwise
31584        require the operands to be swapped. For MVE, we can only compare
31585        registers.  */
31586     case LE:
31587     case LT:
31588       if (!TARGET_HAVE_MVE)
31589         if (op1 != CONST0_RTX (cmp_mode))
31590           {
31591             code = swap_condition (code);
31592             std::swap (op0, op1);
31593           }
31594       /* Fall through.  */
31595
31596     /* These are natively supported by Neon for both register and zero
31597        operands. MVE supports registers only.  */
31598     case EQ:
31599     case GE:
31600     case GT:
31601     case NE:
31602       if (TARGET_HAVE_MVE)
31603         {
31604           switch (GET_MODE_CLASS (cmp_mode))
31605             {
31606             case MODE_VECTOR_INT:
31607               emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31608                                         op0, force_reg (cmp_mode, op1)));
31609               break;
31610             case MODE_VECTOR_FLOAT:
31611               if (TARGET_HAVE_MVE_FLOAT)
31612                 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31613                                             op0, force_reg (cmp_mode, op1)));
31614               else
31615                 gcc_unreachable ();
31616               break;
31617             default:
31618               gcc_unreachable ();
31619             }
31620         }
31621       else
31622         emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31623       return inverted;
31624
31625     /* These are natively supported for register operands only.
31626        Comparisons with zero aren't useful and should be folded
31627        or canonicalized by target-independent code.  */
31628     case GEU:
31629     case GTU:
31630       if (TARGET_HAVE_MVE)
31631         emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31632                                   op0, force_reg (cmp_mode, op1)));
31633       else
31634         emit_insn (gen_neon_vc (code, cmp_mode, target,
31635                                 op0, force_reg (cmp_mode, op1)));
31636       return inverted;
31637
31638     /* These require the operands to be swapped and likewise do not
31639        support comparisons with zero.  */
31640     case LEU:
31641     case LTU:
31642       if (TARGET_HAVE_MVE)
31643         emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31644                                   force_reg (cmp_mode, op1), op0));
31645       else
31646         emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31647                                 target, force_reg (cmp_mode, op1), op0));
31648       return inverted;
31649
31650     /* These need a combination of two comparisons.  */
31651     case LTGT:
31652     case ORDERED:
31653       {
31654         /* Operands are LTGT iff (a > b || a > b).
31655            Operands are ORDERED iff (a > b || a <= b).  */
31656         rtx gt_res = gen_reg_rtx (cmp_result_mode);
31657         rtx alt_res = gen_reg_rtx (cmp_result_mode);
31658         rtx_code alt_code = (code == LTGT ? LT : LE);
31659         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31660             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31661           gcc_unreachable ();
31662         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31663                                                      gt_res, alt_res)));
31664         return inverted;
31665       }
31666
31667     default:
31668       gcc_unreachable ();
31669     }
31670 }
31671
31672 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31673    CMP_RESULT_MODE is the mode of the comparison result.  */
31674
31675 void
31676 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31677 {
31678   /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31679      arm_expand_vector_compare, and another one here.  */
31680   rtx mask;
31681
31682   if (TARGET_HAVE_MVE)
31683     mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31684   else
31685     mask = gen_reg_rtx (cmp_result_mode);
31686
31687   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31688                                              operands[4], operands[5], true);
31689   if (inverted)
31690     std::swap (operands[1], operands[2]);
31691   if (TARGET_NEON)
31692   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31693                             mask, operands[1], operands[2]));
31694   else
31695     {
31696       machine_mode cmp_mode = GET_MODE (operands[0]);
31697
31698       switch (GET_MODE_CLASS (cmp_mode))
31699         {
31700         case MODE_VECTOR_INT:
31701           emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
31702                                 operands[1], operands[2], mask));
31703           break;
31704         case MODE_VECTOR_FLOAT:
31705           if (TARGET_HAVE_MVE_FLOAT)
31706             emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
31707                                     operands[1], operands[2], mask));
31708           else
31709             gcc_unreachable ();
31710           break;
31711         default:
31712           gcc_unreachable ();
31713         }
31714     }
31715 }
31716 \f
31717 #define MAX_VECT_LEN 16
31718
31719 struct expand_vec_perm_d
31720 {
31721   rtx target, op0, op1;
31722   vec_perm_indices perm;
31723   machine_mode vmode;
31724   bool one_vector_p;
31725   bool testing_p;
31726 };
31727
31728 /* Generate a variable permutation.  */
31729
31730 static void
31731 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31732 {
31733   machine_mode vmode = GET_MODE (target);
31734   bool one_vector_p = rtx_equal_p (op0, op1);
31735
31736   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31737   gcc_checking_assert (GET_MODE (op0) == vmode);
31738   gcc_checking_assert (GET_MODE (op1) == vmode);
31739   gcc_checking_assert (GET_MODE (sel) == vmode);
31740   gcc_checking_assert (TARGET_NEON);
31741
31742   if (one_vector_p)
31743     {
31744       if (vmode == V8QImode)
31745         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31746       else
31747         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31748     }
31749   else
31750     {
31751       rtx pair;
31752
31753       if (vmode == V8QImode)
31754         {
31755           pair = gen_reg_rtx (V16QImode);
31756           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31757           pair = gen_lowpart (TImode, pair);
31758           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31759         }
31760       else
31761         {
31762           pair = gen_reg_rtx (OImode);
31763           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31764           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31765         }
31766     }
31767 }
31768
31769 void
31770 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31771 {
31772   machine_mode vmode = GET_MODE (target);
31773   unsigned int nelt = GET_MODE_NUNITS (vmode);
31774   bool one_vector_p = rtx_equal_p (op0, op1);
31775   rtx mask;
31776
31777   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31778      numbering of elements for big-endian, we must reverse the order.  */
31779   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31780
31781   /* The VTBL instruction does not use a modulo index, so we must take care
31782      of that ourselves.  */
31783   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31784   mask = gen_const_vec_duplicate (vmode, mask);
31785   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31786
31787   arm_expand_vec_perm_1 (target, op0, op1, sel);
31788 }
31789
31790 /* Map lane ordering between architectural lane order, and GCC lane order,
31791    taking into account ABI.  See comment above output_move_neon for details.  */
31792
31793 static int
31794 neon_endian_lane_map (machine_mode mode, int lane)
31795 {
31796   if (BYTES_BIG_ENDIAN)
31797   {
31798     int nelems = GET_MODE_NUNITS (mode);
31799     /* Reverse lane order.  */
31800     lane = (nelems - 1 - lane);
31801     /* Reverse D register order, to match ABI.  */
31802     if (GET_MODE_SIZE (mode) == 16)
31803       lane = lane ^ (nelems / 2);
31804   }
31805   return lane;
31806 }
31807
31808 /* Some permutations index into pairs of vectors, this is a helper function
31809    to map indexes into those pairs of vectors.  */
31810
31811 static int
31812 neon_pair_endian_lane_map (machine_mode mode, int lane)
31813 {
31814   int nelem = GET_MODE_NUNITS (mode);
31815   if (BYTES_BIG_ENDIAN)
31816     lane =
31817       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31818   return lane;
31819 }
31820
31821 /* Generate or test for an insn that supports a constant permutation.  */
31822
31823 /* Recognize patterns for the VUZP insns.  */
31824
31825 static bool
31826 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31827 {
31828   unsigned int i, odd, mask, nelt = d->perm.length ();
31829   rtx out0, out1, in0, in1;
31830   int first_elem;
31831   int swap_nelt;
31832
31833   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31834     return false;
31835
31836   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31837      big endian pattern on 64 bit vectors, so we correct for that.  */
31838   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31839     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31840
31841   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31842
31843   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31844     odd = 0;
31845   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31846     odd = 1;
31847   else
31848     return false;
31849   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31850
31851   for (i = 0; i < nelt; i++)
31852     {
31853       unsigned elt =
31854         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31855       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31856         return false;
31857     }
31858
31859   /* Success!  */
31860   if (d->testing_p)
31861     return true;
31862
31863   in0 = d->op0;
31864   in1 = d->op1;
31865   if (swap_nelt != 0)
31866     std::swap (in0, in1);
31867
31868   out0 = d->target;
31869   out1 = gen_reg_rtx (d->vmode);
31870   if (odd)
31871     std::swap (out0, out1);
31872
31873   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31874   return true;
31875 }
31876
31877 /* Recognize patterns for the VZIP insns.  */
31878
31879 static bool
31880 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31881 {
31882   unsigned int i, high, mask, nelt = d->perm.length ();
31883   rtx out0, out1, in0, in1;
31884   int first_elem;
31885   bool is_swapped;
31886
31887   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31888     return false;
31889
31890   is_swapped = BYTES_BIG_ENDIAN;
31891
31892   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31893
31894   high = nelt / 2;
31895   if (first_elem == neon_endian_lane_map (d->vmode, high))
31896     ;
31897   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31898     high = 0;
31899   else
31900     return false;
31901   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31902
31903   for (i = 0; i < nelt / 2; i++)
31904     {
31905       unsigned elt =
31906         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31907       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31908           != elt)
31909         return false;
31910       elt =
31911         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31912       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31913           != elt)
31914         return false;
31915     }
31916
31917   /* Success!  */
31918   if (d->testing_p)
31919     return true;
31920
31921   in0 = d->op0;
31922   in1 = d->op1;
31923   if (is_swapped)
31924     std::swap (in0, in1);
31925
31926   out0 = d->target;
31927   out1 = gen_reg_rtx (d->vmode);
31928   if (high)
31929     std::swap (out0, out1);
31930
31931   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31932   return true;
31933 }
31934
31935 /* Recognize patterns for the VREV insns.  */
31936 static bool
31937 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31938 {
31939   unsigned int i, j, diff, nelt = d->perm.length ();
31940   rtx (*gen) (machine_mode, rtx, rtx);
31941
31942   if (!d->one_vector_p)
31943     return false;
31944
31945   diff = d->perm[0];
31946   switch (diff)
31947     {
31948     case 7:
31949        switch (d->vmode)
31950         {
31951          case E_V16QImode:
31952          case E_V8QImode:
31953           gen = gen_neon_vrev64;
31954           break;
31955          default:
31956           return false;
31957         }
31958        break;
31959     case 3:
31960        switch (d->vmode)
31961         {
31962         case E_V16QImode:
31963         case E_V8QImode:
31964           gen = gen_neon_vrev32;
31965           break;
31966         case E_V8HImode:
31967         case E_V4HImode:
31968         case E_V8HFmode:
31969         case E_V4HFmode:
31970           gen = gen_neon_vrev64;
31971           break;
31972         default:
31973           return false;
31974         }
31975       break;
31976     case 1:
31977       switch (d->vmode)
31978         {
31979         case E_V16QImode:
31980         case E_V8QImode:
31981           gen = gen_neon_vrev16;
31982           break;
31983         case E_V8HImode:
31984         case E_V4HImode:
31985           gen = gen_neon_vrev32;
31986           break;
31987         case E_V4SImode:
31988         case E_V2SImode:
31989         case E_V4SFmode:
31990         case E_V2SFmode:
31991           gen = gen_neon_vrev64;
31992           break;
31993         default:
31994           return false;
31995         }
31996       break;
31997     default:
31998       return false;
31999     }
32000
32001   for (i = 0; i < nelt ; i += diff + 1)
32002     for (j = 0; j <= diff; j += 1)
32003       {
32004         /* This is guaranteed to be true as the value of diff
32005            is 7, 3, 1 and we should have enough elements in the
32006            queue to generate this. Getting a vector mask with a
32007            value of diff other than these values implies that
32008            something is wrong by the time we get here.  */
32009         gcc_assert (i + j < nelt);
32010         if (d->perm[i + j] != i + diff - j)
32011           return false;
32012       }
32013
32014   /* Success! */
32015   if (d->testing_p)
32016     return true;
32017
32018   emit_insn (gen (d->vmode, d->target, d->op0));
32019   return true;
32020 }
32021
32022 /* Recognize patterns for the VTRN insns.  */
32023
32024 static bool
32025 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
32026 {
32027   unsigned int i, odd, mask, nelt = d->perm.length ();
32028   rtx out0, out1, in0, in1;
32029
32030   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32031     return false;
32032
32033   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
32034   if (d->perm[0] == 0)
32035     odd = 0;
32036   else if (d->perm[0] == 1)
32037     odd = 1;
32038   else
32039     return false;
32040   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32041
32042   for (i = 0; i < nelt; i += 2)
32043     {
32044       if (d->perm[i] != i + odd)
32045         return false;
32046       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
32047         return false;
32048     }
32049
32050   /* Success!  */
32051   if (d->testing_p)
32052     return true;
32053
32054   in0 = d->op0;
32055   in1 = d->op1;
32056   if (BYTES_BIG_ENDIAN)
32057     {
32058       std::swap (in0, in1);
32059       odd = !odd;
32060     }
32061
32062   out0 = d->target;
32063   out1 = gen_reg_rtx (d->vmode);
32064   if (odd)
32065     std::swap (out0, out1);
32066
32067   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
32068   return true;
32069 }
32070
32071 /* Recognize patterns for the VEXT insns.  */
32072
32073 static bool
32074 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
32075 {
32076   unsigned int i, nelt = d->perm.length ();
32077   rtx offset;
32078
32079   unsigned int location;
32080
32081   unsigned int next  = d->perm[0] + 1;
32082
32083   /* TODO: Handle GCC's numbering of elements for big-endian.  */
32084   if (BYTES_BIG_ENDIAN)
32085     return false;
32086
32087   /* Check if the extracted indexes are increasing by one.  */
32088   for (i = 1; i < nelt; next++, i++)
32089     {
32090       /* If we hit the most significant element of the 2nd vector in
32091          the previous iteration, no need to test further.  */
32092       if (next == 2 * nelt)
32093         return false;
32094
32095       /* If we are operating on only one vector: it could be a
32096          rotation.  If there are only two elements of size < 64, let
32097          arm_evpc_neon_vrev catch it.  */
32098       if (d->one_vector_p && (next == nelt))
32099         {
32100           if ((nelt == 2) && (d->vmode != V2DImode))
32101             return false;
32102           else
32103             next = 0;
32104         }
32105
32106       if (d->perm[i] != next)
32107         return false;
32108     }
32109
32110   location = d->perm[0];
32111
32112   /* Success! */
32113   if (d->testing_p)
32114     return true;
32115
32116   offset = GEN_INT (location);
32117
32118   if(d->vmode == E_DImode)
32119     return false;
32120
32121   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32122   return true;
32123 }
32124
32125 /* The NEON VTBL instruction is a fully variable permuation that's even
32126    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
32127    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
32128    can do slightly better by expanding this as a constant where we don't
32129    have to apply a mask.  */
32130
32131 static bool
32132 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32133 {
32134   rtx rperm[MAX_VECT_LEN], sel;
32135   machine_mode vmode = d->vmode;
32136   unsigned int i, nelt = d->perm.length ();
32137
32138   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
32139      numbering of elements for big-endian, we must reverse the order.  */
32140   if (BYTES_BIG_ENDIAN)
32141     return false;
32142
32143   if (d->testing_p)
32144     return true;
32145
32146   /* Generic code will try constant permutation twice.  Once with the
32147      original mode and again with the elements lowered to QImode.
32148      So wait and don't do the selector expansion ourselves.  */
32149   if (vmode != V8QImode && vmode != V16QImode)
32150     return false;
32151
32152   for (i = 0; i < nelt; ++i)
32153     rperm[i] = GEN_INT (d->perm[i]);
32154   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32155   sel = force_reg (vmode, sel);
32156
32157   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32158   return true;
32159 }
32160
32161 static bool
32162 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32163 {
32164   /* Check if the input mask matches vext before reordering the
32165      operands.  */
32166   if (TARGET_NEON)
32167     if (arm_evpc_neon_vext (d))
32168       return true;
32169
32170   /* The pattern matching functions above are written to look for a small
32171      number to begin the sequence (0, 1, N/2).  If we begin with an index
32172      from the second operand, we can swap the operands.  */
32173   unsigned int nelt = d->perm.length ();
32174   if (d->perm[0] >= nelt)
32175     {
32176       d->perm.rotate_inputs (1);
32177       std::swap (d->op0, d->op1);
32178     }
32179
32180   if (TARGET_NEON)
32181     {
32182       if (arm_evpc_neon_vuzp (d))
32183         return true;
32184       if (arm_evpc_neon_vzip (d))
32185         return true;
32186       if (arm_evpc_neon_vrev (d))
32187         return true;
32188       if (arm_evpc_neon_vtrn (d))
32189         return true;
32190       return arm_evpc_neon_vtbl (d);
32191     }
32192   return false;
32193 }
32194
32195 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
32196
32197 static bool
32198 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32199                               rtx target, rtx op0, rtx op1,
32200                               const vec_perm_indices &sel)
32201 {
32202   if (vmode != op_mode)
32203     return false;
32204
32205   struct expand_vec_perm_d d;
32206   int i, nelt, which;
32207
32208   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32209     return false;
32210
32211   d.target = target;
32212   if (op0)
32213     {
32214       rtx nop0 = force_reg (vmode, op0);
32215       if (op0 == op1)
32216         op1 = nop0;
32217       op0 = nop0;
32218     }
32219   if (op1)
32220     op1 = force_reg (vmode, op1);
32221   d.op0 = op0;
32222   d.op1 = op1;
32223
32224   d.vmode = vmode;
32225   gcc_assert (VECTOR_MODE_P (d.vmode));
32226   d.testing_p = !target;
32227
32228   nelt = GET_MODE_NUNITS (d.vmode);
32229   for (i = which = 0; i < nelt; ++i)
32230     {
32231       int ei = sel[i] & (2 * nelt - 1);
32232       which |= (ei < nelt ? 1 : 2);
32233     }
32234
32235   switch (which)
32236     {
32237     default:
32238       gcc_unreachable();
32239
32240     case 3:
32241       d.one_vector_p = false;
32242       if (d.testing_p || !rtx_equal_p (op0, op1))
32243         break;
32244
32245       /* The elements of PERM do not suggest that only the first operand
32246          is used, but both operands are identical.  Allow easier matching
32247          of the permutation by folding the permutation into the single
32248          input vector.  */
32249       /* FALLTHRU */
32250     case 2:
32251       d.op0 = op1;
32252       d.one_vector_p = true;
32253       break;
32254
32255     case 1:
32256       d.op1 = op0;
32257       d.one_vector_p = true;
32258       break;
32259     }
32260
32261   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32262
32263   if (!d.testing_p)
32264     return arm_expand_vec_perm_const_1 (&d);
32265
32266   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32267   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32268   if (!d.one_vector_p)
32269     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32270
32271   start_sequence ();
32272   bool ret = arm_expand_vec_perm_const_1 (&d);
32273   end_sequence ();
32274
32275   return ret;
32276 }
32277
32278 bool
32279 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32280 {
32281   /* If we are soft float and we do not have ldrd
32282      then all auto increment forms are ok.  */
32283   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32284     return true;
32285
32286   switch (code)
32287     {
32288       /* Post increment and Pre Decrement are supported for all
32289          instruction forms except for vector forms.  */
32290     case ARM_POST_INC:
32291     case ARM_PRE_DEC:
32292       if (VECTOR_MODE_P (mode))
32293         {
32294           if (code != ARM_PRE_DEC)
32295             return true;
32296           else
32297             return false;
32298         }
32299
32300       return true;
32301
32302     case ARM_POST_DEC:
32303     case ARM_PRE_INC:
32304       /* Without LDRD and mode size greater than
32305          word size, there is no point in auto-incrementing
32306          because ldm and stm will not have these forms.  */
32307       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32308         return false;
32309
32310       /* Vector and floating point modes do not support
32311          these auto increment forms.  */
32312       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32313         return false;
32314
32315       return true;
32316
32317     default:
32318       return false;
32319
32320     }
32321
32322   return false;
32323 }
32324
32325 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32326    on ARM, since we know that shifts by negative amounts are no-ops.
32327    Additionally, the default expansion code is not available or suitable
32328    for post-reload insn splits (this can occur when the register allocator
32329    chooses not to do a shift in NEON).
32330
32331    This function is used in both initial expand and post-reload splits, and
32332    handles all kinds of 64-bit shifts.
32333
32334    Input requirements:
32335     - It is safe for the input and output to be the same register, but
32336       early-clobber rules apply for the shift amount and scratch registers.
32337     - Shift by register requires both scratch registers.  In all other cases
32338       the scratch registers may be NULL.
32339     - Ashiftrt by a register also clobbers the CC register.  */
32340 void
32341 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32342                                rtx amount, rtx scratch1, rtx scratch2)
32343 {
32344   rtx out_high = gen_highpart (SImode, out);
32345   rtx out_low = gen_lowpart (SImode, out);
32346   rtx in_high = gen_highpart (SImode, in);
32347   rtx in_low = gen_lowpart (SImode, in);
32348
32349   /* Terminology:
32350         in = the register pair containing the input value.
32351         out = the destination register pair.
32352         up = the high- or low-part of each pair.
32353         down = the opposite part to "up".
32354      In a shift, we can consider bits to shift from "up"-stream to
32355      "down"-stream, so in a left-shift "up" is the low-part and "down"
32356      is the high-part of each register pair.  */
32357
32358   rtx out_up   = code == ASHIFT ? out_low : out_high;
32359   rtx out_down = code == ASHIFT ? out_high : out_low;
32360   rtx in_up   = code == ASHIFT ? in_low : in_high;
32361   rtx in_down = code == ASHIFT ? in_high : in_low;
32362
32363   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32364   gcc_assert (out
32365               && (REG_P (out) || SUBREG_P (out))
32366               && GET_MODE (out) == DImode);
32367   gcc_assert (in
32368               && (REG_P (in) || SUBREG_P (in))
32369               && GET_MODE (in) == DImode);
32370   gcc_assert (amount
32371               && (((REG_P (amount) || SUBREG_P (amount))
32372                    && GET_MODE (amount) == SImode)
32373                   || CONST_INT_P (amount)));
32374   gcc_assert (scratch1 == NULL
32375               || (GET_CODE (scratch1) == SCRATCH)
32376               || (GET_MODE (scratch1) == SImode
32377                   && REG_P (scratch1)));
32378   gcc_assert (scratch2 == NULL
32379               || (GET_CODE (scratch2) == SCRATCH)
32380               || (GET_MODE (scratch2) == SImode
32381                   && REG_P (scratch2)));
32382   gcc_assert (!REG_P (out) || !REG_P (amount)
32383               || !HARD_REGISTER_P (out)
32384               || (REGNO (out) != REGNO (amount)
32385                   && REGNO (out) + 1 != REGNO (amount)));
32386
32387   /* Macros to make following code more readable.  */
32388   #define SUB_32(DEST,SRC) \
32389             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32390   #define RSB_32(DEST,SRC) \
32391             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32392   #define SUB_S_32(DEST,SRC) \
32393             gen_addsi3_compare0 ((DEST), (SRC), \
32394                                  GEN_INT (-32))
32395   #define SET(DEST,SRC) \
32396             gen_rtx_SET ((DEST), (SRC))
32397   #define SHIFT(CODE,SRC,AMOUNT) \
32398             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32399   #define LSHIFT(CODE,SRC,AMOUNT) \
32400             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32401                             SImode, (SRC), (AMOUNT))
32402   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32403             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32404                             SImode, (SRC), (AMOUNT))
32405   #define ORR(A,B) \
32406             gen_rtx_IOR (SImode, (A), (B))
32407   #define BRANCH(COND,LABEL) \
32408             gen_arm_cond_branch ((LABEL), \
32409                                  gen_rtx_ ## COND (CCmode, cc_reg, \
32410                                                    const0_rtx), \
32411                                  cc_reg)
32412
32413   /* Shifts by register and shifts by constant are handled separately.  */
32414   if (CONST_INT_P (amount))
32415     {
32416       /* We have a shift-by-constant.  */
32417
32418       /* First, handle out-of-range shift amounts.
32419          In both cases we try to match the result an ARM instruction in a
32420          shift-by-register would give.  This helps reduce execution
32421          differences between optimization levels, but it won't stop other
32422          parts of the compiler doing different things.  This is "undefined
32423          behavior, in any case.  */
32424       if (INTVAL (amount) <= 0)
32425         emit_insn (gen_movdi (out, in));
32426       else if (INTVAL (amount) >= 64)
32427         {
32428           if (code == ASHIFTRT)
32429             {
32430               rtx const31_rtx = GEN_INT (31);
32431               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32432               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32433             }
32434           else
32435             emit_insn (gen_movdi (out, const0_rtx));
32436         }
32437
32438       /* Now handle valid shifts. */
32439       else if (INTVAL (amount) < 32)
32440         {
32441           /* Shifts by a constant less than 32.  */
32442           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32443
32444           /* Clearing the out register in DImode first avoids lots
32445              of spilling and results in less stack usage.
32446              Later this redundant insn is completely removed.
32447              Do that only if "in" and "out" are different registers.  */
32448           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32449             emit_insn (SET (out, const0_rtx));
32450           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32451           emit_insn (SET (out_down,
32452                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
32453                                out_down)));
32454           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32455         }
32456       else
32457         {
32458           /* Shifts by a constant greater than 31.  */
32459           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32460
32461           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32462             emit_insn (SET (out, const0_rtx));
32463           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32464           if (code == ASHIFTRT)
32465             emit_insn (gen_ashrsi3 (out_up, in_up,
32466                                     GEN_INT (31)));
32467           else
32468             emit_insn (SET (out_up, const0_rtx));
32469         }
32470     }
32471   else
32472     {
32473       /* We have a shift-by-register.  */
32474       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32475
32476       /* This alternative requires the scratch registers.  */
32477       gcc_assert (scratch1 && REG_P (scratch1));
32478       gcc_assert (scratch2 && REG_P (scratch2));
32479
32480       /* We will need the values "amount-32" and "32-amount" later.
32481          Swapping them around now allows the later code to be more general. */
32482       switch (code)
32483         {
32484         case ASHIFT:
32485           emit_insn (SUB_32 (scratch1, amount));
32486           emit_insn (RSB_32 (scratch2, amount));
32487           break;
32488         case ASHIFTRT:
32489           emit_insn (RSB_32 (scratch1, amount));
32490           /* Also set CC = amount > 32.  */
32491           emit_insn (SUB_S_32 (scratch2, amount));
32492           break;
32493         case LSHIFTRT:
32494           emit_insn (RSB_32 (scratch1, amount));
32495           emit_insn (SUB_32 (scratch2, amount));
32496           break;
32497         default:
32498           gcc_unreachable ();
32499         }
32500
32501       /* Emit code like this:
32502
32503          arithmetic-left:
32504             out_down = in_down << amount;
32505             out_down = (in_up << (amount - 32)) | out_down;
32506             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32507             out_up = in_up << amount;
32508
32509          arithmetic-right:
32510             out_down = in_down >> amount;
32511             out_down = (in_up << (32 - amount)) | out_down;
32512             if (amount < 32)
32513               out_down = ((signed)in_up >> (amount - 32)) | out_down;
32514             out_up = in_up << amount;
32515
32516          logical-right:
32517             out_down = in_down >> amount;
32518             out_down = (in_up << (32 - amount)) | out_down;
32519             if (amount < 32)
32520               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32521             out_up = in_up << amount;
32522
32523           The ARM and Thumb2 variants are the same but implemented slightly
32524           differently.  If this were only called during expand we could just
32525           use the Thumb2 case and let combine do the right thing, but this
32526           can also be called from post-reload splitters.  */
32527
32528       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32529
32530       if (!TARGET_THUMB2)
32531         {
32532           /* Emit code for ARM mode.  */
32533           emit_insn (SET (out_down,
32534                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32535           if (code == ASHIFTRT)
32536             {
32537               rtx_code_label *done_label = gen_label_rtx ();
32538               emit_jump_insn (BRANCH (LT, done_label));
32539               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32540                                              out_down)));
32541               emit_label (done_label);
32542             }
32543           else
32544             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32545                                            out_down)));
32546         }
32547       else
32548         {
32549           /* Emit code for Thumb2 mode.
32550              Thumb2 can't do shift and or in one insn.  */
32551           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32552           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32553
32554           if (code == ASHIFTRT)
32555             {
32556               rtx_code_label *done_label = gen_label_rtx ();
32557               emit_jump_insn (BRANCH (LT, done_label));
32558               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32559               emit_insn (SET (out_down, ORR (out_down, scratch2)));
32560               emit_label (done_label);
32561             }
32562           else
32563             {
32564               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32565               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32566             }
32567         }
32568
32569       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32570     }
32571
32572   #undef SUB_32
32573   #undef RSB_32
32574   #undef SUB_S_32
32575   #undef SET
32576   #undef SHIFT
32577   #undef LSHIFT
32578   #undef REV_LSHIFT
32579   #undef ORR
32580   #undef BRANCH
32581 }
32582
32583 /* Returns true if the pattern is a valid symbolic address, which is either a
32584    symbol_ref or (symbol_ref + addend).
32585
32586    According to the ARM ELF ABI, the initial addend of REL-type relocations
32587    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32588    literal field of the instruction as a 16-bit signed value in the range
32589    -32768 <= A < 32768.
32590
32591    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32592    unsigned range of 0 <= A < 256 as described in the AAELF32
32593    relocation handling documentation: REL-type relocations are encoded
32594    as unsigned in this case.  */
32595
32596 bool
32597 arm_valid_symbolic_address_p (rtx addr)
32598 {
32599   rtx xop0, xop1 = NULL_RTX;
32600   rtx tmp = addr;
32601
32602   if (target_word_relocations)
32603     return false;
32604
32605   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32606     return true;
32607
32608   /* (const (plus: symbol_ref const_int))  */
32609   if (GET_CODE (addr) == CONST)
32610     tmp = XEXP (addr, 0);
32611
32612   if (GET_CODE (tmp) == PLUS)
32613     {
32614       xop0 = XEXP (tmp, 0);
32615       xop1 = XEXP (tmp, 1);
32616
32617       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32618         {
32619           if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32620             return IN_RANGE (INTVAL (xop1), 0, 0xff);
32621           else
32622             return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32623         }
32624     }
32625
32626   return false;
32627 }
32628
32629 /* Returns true if a valid comparison operation and makes
32630    the operands in a form that is valid.  */
32631 bool
32632 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32633 {
32634   enum rtx_code code = GET_CODE (*comparison);
32635   int code_int;
32636   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32637     ? GET_MODE (*op2) : GET_MODE (*op1);
32638
32639   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32640
32641   if (code == UNEQ || code == LTGT)
32642     return false;
32643
32644   code_int = (int)code;
32645   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32646   PUT_CODE (*comparison, (enum rtx_code)code_int);
32647
32648   switch (mode)
32649     {
32650     case E_SImode:
32651       if (!arm_add_operand (*op1, mode))
32652         *op1 = force_reg (mode, *op1);
32653       if (!arm_add_operand (*op2, mode))
32654         *op2 = force_reg (mode, *op2);
32655       return true;
32656
32657     case E_DImode:
32658       /* gen_compare_reg() will sort out any invalid operands.  */
32659       return true;
32660
32661     case E_HFmode:
32662       if (!TARGET_VFP_FP16INST)
32663         break;
32664       /* FP16 comparisons are done in SF mode.  */
32665       mode = SFmode;
32666       *op1 = convert_to_mode (mode, *op1, 1);
32667       *op2 = convert_to_mode (mode, *op2, 1);
32668       /* Fall through.  */
32669     case E_SFmode:
32670     case E_DFmode:
32671       if (!vfp_compare_operand (*op1, mode))
32672         *op1 = force_reg (mode, *op1);
32673       if (!vfp_compare_operand (*op2, mode))
32674         *op2 = force_reg (mode, *op2);
32675       return true;
32676     default:
32677       break;
32678     }
32679
32680   return false;
32681
32682 }
32683
32684 /* Maximum number of instructions to set block of memory.  */
32685 static int
32686 arm_block_set_max_insns (void)
32687 {
32688   if (optimize_function_for_size_p (cfun))
32689     return 4;
32690   else
32691     return current_tune->max_insns_inline_memset;
32692 }
32693
32694 /* Return TRUE if it's profitable to set block of memory for
32695    non-vectorized case.  VAL is the value to set the memory
32696    with.  LENGTH is the number of bytes to set.  ALIGN is the
32697    alignment of the destination memory in bytes.  UNALIGNED_P
32698    is TRUE if we can only set the memory with instructions
32699    meeting alignment requirements.  USE_STRD_P is TRUE if we
32700    can use strd to set the memory.  */
32701 static bool
32702 arm_block_set_non_vect_profit_p (rtx val,
32703                                  unsigned HOST_WIDE_INT length,
32704                                  unsigned HOST_WIDE_INT align,
32705                                  bool unaligned_p, bool use_strd_p)
32706 {
32707   int num = 0;
32708   /* For leftovers in bytes of 0-7, we can set the memory block using
32709      strb/strh/str with minimum instruction number.  */
32710   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32711
32712   if (unaligned_p)
32713     {
32714       num = arm_const_inline_cost (SET, val);
32715       num += length / align + length % align;
32716     }
32717   else if (use_strd_p)
32718     {
32719       num = arm_const_double_inline_cost (val);
32720       num += (length >> 3) + leftover[length & 7];
32721     }
32722   else
32723     {
32724       num = arm_const_inline_cost (SET, val);
32725       num += (length >> 2) + leftover[length & 3];
32726     }
32727
32728   /* We may be able to combine last pair STRH/STRB into a single STR
32729      by shifting one byte back.  */
32730   if (unaligned_access && length > 3 && (length & 3) == 3)
32731     num--;
32732
32733   return (num <= arm_block_set_max_insns ());
32734 }
32735
32736 /* Return TRUE if it's profitable to set block of memory for
32737    vectorized case.  LENGTH is the number of bytes to set.
32738    ALIGN is the alignment of destination memory in bytes.
32739    MODE is the vector mode used to set the memory.  */
32740 static bool
32741 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32742                              unsigned HOST_WIDE_INT align,
32743                              machine_mode mode)
32744 {
32745   int num;
32746   bool unaligned_p = ((align & 3) != 0);
32747   unsigned int nelt = GET_MODE_NUNITS (mode);
32748
32749   /* Instruction loading constant value.  */
32750   num = 1;
32751   /* Instructions storing the memory.  */
32752   num += (length + nelt - 1) / nelt;
32753   /* Instructions adjusting the address expression.  Only need to
32754      adjust address expression if it's 4 bytes aligned and bytes
32755      leftover can only be stored by mis-aligned store instruction.  */
32756   if (!unaligned_p && (length & 3) != 0)
32757     num++;
32758
32759   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32760   if (!unaligned_p && mode == V16QImode)
32761     num--;
32762
32763   return (num <= arm_block_set_max_insns ());
32764 }
32765
32766 /* Set a block of memory using vectorization instructions for the
32767    unaligned case.  We fill the first LENGTH bytes of the memory
32768    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32769    the alignment requirement of memory.  Return TRUE if succeeded.  */
32770 static bool
32771 arm_block_set_unaligned_vect (rtx dstbase,
32772                               unsigned HOST_WIDE_INT length,
32773                               unsigned HOST_WIDE_INT value,
32774                               unsigned HOST_WIDE_INT align)
32775 {
32776   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32777   rtx dst, mem;
32778   rtx val_vec, reg;
32779   rtx (*gen_func) (rtx, rtx);
32780   machine_mode mode;
32781   unsigned HOST_WIDE_INT v = value;
32782   unsigned int offset = 0;
32783   gcc_assert ((align & 0x3) != 0);
32784   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32785   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32786   if (length >= nelt_v16)
32787     {
32788       mode = V16QImode;
32789       gen_func = gen_movmisalignv16qi;
32790     }
32791   else
32792     {
32793       mode = V8QImode;
32794       gen_func = gen_movmisalignv8qi;
32795     }
32796   nelt_mode = GET_MODE_NUNITS (mode);
32797   gcc_assert (length >= nelt_mode);
32798   /* Skip if it isn't profitable.  */
32799   if (!arm_block_set_vect_profit_p (length, align, mode))
32800     return false;
32801
32802   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32803   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32804
32805   v = sext_hwi (v, BITS_PER_WORD);
32806
32807   reg = gen_reg_rtx (mode);
32808   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32809   /* Emit instruction loading the constant value.  */
32810   emit_move_insn (reg, val_vec);
32811
32812   /* Handle nelt_mode bytes in a vector.  */
32813   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32814     {
32815       emit_insn ((*gen_func) (mem, reg));
32816       if (i + 2 * nelt_mode <= length)
32817         {
32818           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32819           offset += nelt_mode;
32820           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32821         }
32822     }
32823
32824   /* If there are not less than nelt_v8 bytes leftover, we must be in
32825      V16QI mode.  */
32826   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32827
32828   /* Handle (8, 16) bytes leftover.  */
32829   if (i + nelt_v8 < length)
32830     {
32831       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32832       offset += length - i;
32833       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32834
32835       /* We are shifting bytes back, set the alignment accordingly.  */
32836       if ((length & 1) != 0 && align >= 2)
32837         set_mem_align (mem, BITS_PER_UNIT);
32838
32839       emit_insn (gen_movmisalignv16qi (mem, reg));
32840     }
32841   /* Handle (0, 8] bytes leftover.  */
32842   else if (i < length && i + nelt_v8 >= length)
32843     {
32844       if (mode == V16QImode)
32845         reg = gen_lowpart (V8QImode, reg);
32846
32847       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32848                                               + (nelt_mode - nelt_v8))));
32849       offset += (length - i) + (nelt_mode - nelt_v8);
32850       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32851
32852       /* We are shifting bytes back, set the alignment accordingly.  */
32853       if ((length & 1) != 0 && align >= 2)
32854         set_mem_align (mem, BITS_PER_UNIT);
32855
32856       emit_insn (gen_movmisalignv8qi (mem, reg));
32857     }
32858
32859   return true;
32860 }
32861
32862 /* Set a block of memory using vectorization instructions for the
32863    aligned case.  We fill the first LENGTH bytes of the memory area
32864    starting from DSTBASE with byte constant VALUE.  ALIGN is the
32865    alignment requirement of memory.  Return TRUE if succeeded.  */
32866 static bool
32867 arm_block_set_aligned_vect (rtx dstbase,
32868                             unsigned HOST_WIDE_INT length,
32869                             unsigned HOST_WIDE_INT value,
32870                             unsigned HOST_WIDE_INT align)
32871 {
32872   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32873   rtx dst, addr, mem;
32874   rtx val_vec, reg;
32875   machine_mode mode;
32876   unsigned int offset = 0;
32877
32878   gcc_assert ((align & 0x3) == 0);
32879   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32880   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32881   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32882     mode = V16QImode;
32883   else
32884     mode = V8QImode;
32885
32886   nelt_mode = GET_MODE_NUNITS (mode);
32887   gcc_assert (length >= nelt_mode);
32888   /* Skip if it isn't profitable.  */
32889   if (!arm_block_set_vect_profit_p (length, align, mode))
32890     return false;
32891
32892   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32893
32894   reg = gen_reg_rtx (mode);
32895   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32896   /* Emit instruction loading the constant value.  */
32897   emit_move_insn (reg, val_vec);
32898
32899   i = 0;
32900   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
32901   if (mode == V16QImode)
32902     {
32903       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32904       emit_insn (gen_movmisalignv16qi (mem, reg));
32905       i += nelt_mode;
32906       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
32907       if (i + nelt_v8 < length && i + nelt_v16 > length)
32908         {
32909           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32910           offset += length - nelt_mode;
32911           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32912           /* We are shifting bytes back, set the alignment accordingly.  */
32913           if ((length & 0x3) == 0)
32914             set_mem_align (mem, BITS_PER_UNIT * 4);
32915           else if ((length & 0x1) == 0)
32916             set_mem_align (mem, BITS_PER_UNIT * 2);
32917           else
32918             set_mem_align (mem, BITS_PER_UNIT);
32919
32920           emit_insn (gen_movmisalignv16qi (mem, reg));
32921           return true;
32922         }
32923       /* Fall through for bytes leftover.  */
32924       mode = V8QImode;
32925       nelt_mode = GET_MODE_NUNITS (mode);
32926       reg = gen_lowpart (V8QImode, reg);
32927     }
32928
32929   /* Handle 8 bytes in a vector.  */
32930   for (; (i + nelt_mode <= length); i += nelt_mode)
32931     {
32932       addr = plus_constant (Pmode, dst, i);
32933       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32934       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32935         emit_move_insn (mem, reg);
32936       else
32937         emit_insn (gen_unaligned_storev8qi (mem, reg));
32938     }
32939
32940   /* Handle single word leftover by shifting 4 bytes back.  We can
32941      use aligned access for this case.  */
32942   if (i + UNITS_PER_WORD == length)
32943     {
32944       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32945       offset += i - UNITS_PER_WORD;
32946       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32947       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32948       if (align > UNITS_PER_WORD)
32949         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32950
32951       emit_insn (gen_unaligned_storev8qi (mem, reg));
32952     }
32953   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32954      We have to use unaligned access for this case.  */
32955   else if (i < length)
32956     {
32957       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32958       offset += length - nelt_mode;
32959       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32960       /* We are shifting bytes back, set the alignment accordingly.  */
32961       if ((length & 1) == 0)
32962         set_mem_align (mem, BITS_PER_UNIT * 2);
32963       else
32964         set_mem_align (mem, BITS_PER_UNIT);
32965
32966       emit_insn (gen_movmisalignv8qi (mem, reg));
32967     }
32968
32969   return true;
32970 }
32971
32972 /* Set a block of memory using plain strh/strb instructions, only
32973    using instructions allowed by ALIGN on processor.  We fill the
32974    first LENGTH bytes of the memory area starting from DSTBASE
32975    with byte constant VALUE.  ALIGN is the alignment requirement
32976    of memory.  */
32977 static bool
32978 arm_block_set_unaligned_non_vect (rtx dstbase,
32979                                   unsigned HOST_WIDE_INT length,
32980                                   unsigned HOST_WIDE_INT value,
32981                                   unsigned HOST_WIDE_INT align)
32982 {
32983   unsigned int i;
32984   rtx dst, addr, mem;
32985   rtx val_exp, val_reg, reg;
32986   machine_mode mode;
32987   HOST_WIDE_INT v = value;
32988
32989   gcc_assert (align == 1 || align == 2);
32990
32991   if (align == 2)
32992     v |= (value << BITS_PER_UNIT);
32993
32994   v = sext_hwi (v, BITS_PER_WORD);
32995   val_exp = GEN_INT (v);
32996   /* Skip if it isn't profitable.  */
32997   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32998                                         align, true, false))
32999     return false;
33000
33001   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33002   mode = (align == 2 ? HImode : QImode);
33003   val_reg = force_reg (SImode, val_exp);
33004   reg = gen_lowpart (mode, val_reg);
33005
33006   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
33007     {
33008       addr = plus_constant (Pmode, dst, i);
33009       mem = adjust_automodify_address (dstbase, mode, addr, i);
33010       emit_move_insn (mem, reg);
33011     }
33012
33013   /* Handle single byte leftover.  */
33014   if (i + 1 == length)
33015     {
33016       reg = gen_lowpart (QImode, val_reg);
33017       addr = plus_constant (Pmode, dst, i);
33018       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33019       emit_move_insn (mem, reg);
33020       i++;
33021     }
33022
33023   gcc_assert (i == length);
33024   return true;
33025 }
33026
33027 /* Set a block of memory using plain strd/str/strh/strb instructions,
33028    to permit unaligned copies on processors which support unaligned
33029    semantics for those instructions.  We fill the first LENGTH bytes
33030    of the memory area starting from DSTBASE with byte constant VALUE.
33031    ALIGN is the alignment requirement of memory.  */
33032 static bool
33033 arm_block_set_aligned_non_vect (rtx dstbase,
33034                                 unsigned HOST_WIDE_INT length,
33035                                 unsigned HOST_WIDE_INT value,
33036                                 unsigned HOST_WIDE_INT align)
33037 {
33038   unsigned int i;
33039   rtx dst, addr, mem;
33040   rtx val_exp, val_reg, reg;
33041   unsigned HOST_WIDE_INT v;
33042   bool use_strd_p;
33043
33044   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
33045                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
33046
33047   v = (value | (value << 8) | (value << 16) | (value << 24));
33048   if (length < UNITS_PER_WORD)
33049     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
33050
33051   if (use_strd_p)
33052     v |= (v << BITS_PER_WORD);
33053   else
33054     v = sext_hwi (v, BITS_PER_WORD);
33055
33056   val_exp = GEN_INT (v);
33057   /* Skip if it isn't profitable.  */
33058   if (!arm_block_set_non_vect_profit_p (val_exp, length,
33059                                         align, false, use_strd_p))
33060     {
33061       if (!use_strd_p)
33062         return false;
33063
33064       /* Try without strd.  */
33065       v = (v >> BITS_PER_WORD);
33066       v = sext_hwi (v, BITS_PER_WORD);
33067       val_exp = GEN_INT (v);
33068       use_strd_p = false;
33069       if (!arm_block_set_non_vect_profit_p (val_exp, length,
33070                                             align, false, use_strd_p))
33071         return false;
33072     }
33073
33074   i = 0;
33075   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33076   /* Handle double words using strd if possible.  */
33077   if (use_strd_p)
33078     {
33079       val_reg = force_reg (DImode, val_exp);
33080       reg = val_reg;
33081       for (; (i + 8 <= length); i += 8)
33082         {
33083           addr = plus_constant (Pmode, dst, i);
33084           mem = adjust_automodify_address (dstbase, DImode, addr, i);
33085           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33086             emit_move_insn (mem, reg);
33087           else
33088             emit_insn (gen_unaligned_storedi (mem, reg));
33089         }
33090     }
33091   else
33092     val_reg = force_reg (SImode, val_exp);
33093
33094   /* Handle words.  */
33095   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
33096   for (; (i + 4 <= length); i += 4)
33097     {
33098       addr = plus_constant (Pmode, dst, i);
33099       mem = adjust_automodify_address (dstbase, SImode, addr, i);
33100       if ((align & 3) == 0)
33101         emit_move_insn (mem, reg);
33102       else
33103         emit_insn (gen_unaligned_storesi (mem, reg));
33104     }
33105
33106   /* Merge last pair of STRH and STRB into a STR if possible.  */
33107   if (unaligned_access && i > 0 && (i + 3) == length)
33108     {
33109       addr = plus_constant (Pmode, dst, i - 1);
33110       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33111       /* We are shifting one byte back, set the alignment accordingly.  */
33112       if ((align & 1) == 0)
33113         set_mem_align (mem, BITS_PER_UNIT);
33114
33115       /* Most likely this is an unaligned access, and we can't tell at
33116          compilation time.  */
33117       emit_insn (gen_unaligned_storesi (mem, reg));
33118       return true;
33119     }
33120
33121   /* Handle half word leftover.  */
33122   if (i + 2 <= length)
33123     {
33124       reg = gen_lowpart (HImode, val_reg);
33125       addr = plus_constant (Pmode, dst, i);
33126       mem = adjust_automodify_address (dstbase, HImode, addr, i);
33127       if ((align & 1) == 0)
33128         emit_move_insn (mem, reg);
33129       else
33130         emit_insn (gen_unaligned_storehi (mem, reg));
33131
33132       i += 2;
33133     }
33134
33135   /* Handle single byte leftover.  */
33136   if (i + 1 == length)
33137     {
33138       reg = gen_lowpart (QImode, val_reg);
33139       addr = plus_constant (Pmode, dst, i);
33140       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33141       emit_move_insn (mem, reg);
33142     }
33143
33144   return true;
33145 }
33146
33147 /* Set a block of memory using vectorization instructions for both
33148    aligned and unaligned cases.  We fill the first LENGTH bytes of
33149    the memory area starting from DSTBASE with byte constant VALUE.
33150    ALIGN is the alignment requirement of memory.  */
33151 static bool
33152 arm_block_set_vect (rtx dstbase,
33153                     unsigned HOST_WIDE_INT length,
33154                     unsigned HOST_WIDE_INT value,
33155                     unsigned HOST_WIDE_INT align)
33156 {
33157   /* Check whether we need to use unaligned store instruction.  */
33158   if (((align & 3) != 0 || (length & 3) != 0)
33159       /* Check whether unaligned store instruction is available.  */
33160       && (!unaligned_access || BYTES_BIG_ENDIAN))
33161     return false;
33162
33163   if ((align & 3) == 0)
33164     return arm_block_set_aligned_vect (dstbase, length, value, align);
33165   else
33166     return arm_block_set_unaligned_vect (dstbase, length, value, align);
33167 }
33168
33169 /* Expand string store operation.  Firstly we try to do that by using
33170    vectorization instructions, then try with ARM unaligned access and
33171    double-word store if profitable.  OPERANDS[0] is the destination,
33172    OPERANDS[1] is the number of bytes, operands[2] is the value to
33173    initialize the memory, OPERANDS[3] is the known alignment of the
33174    destination.  */
33175 bool
33176 arm_gen_setmem (rtx *operands)
33177 {
33178   rtx dstbase = operands[0];
33179   unsigned HOST_WIDE_INT length;
33180   unsigned HOST_WIDE_INT value;
33181   unsigned HOST_WIDE_INT align;
33182
33183   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33184     return false;
33185
33186   length = UINTVAL (operands[1]);
33187   if (length > 64)
33188     return false;
33189
33190   value = (UINTVAL (operands[2]) & 0xFF);
33191   align = UINTVAL (operands[3]);
33192   if (TARGET_NEON && length >= 8
33193       && current_tune->string_ops_prefer_neon
33194       && arm_block_set_vect (dstbase, length, value, align))
33195     return true;
33196
33197   if (!unaligned_access && (align & 3) != 0)
33198     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33199
33200   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33201 }
33202
33203
33204 static bool
33205 arm_macro_fusion_p (void)
33206 {
33207   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33208 }
33209
33210 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33211    for MOVW / MOVT macro fusion.  */
33212
33213 static bool
33214 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33215 {
33216   /* We are trying to fuse
33217      movw imm / movt imm
33218     instructions as a group that gets scheduled together.  */
33219
33220   rtx set_dest = SET_DEST (curr_set);
33221
33222   if (GET_MODE (set_dest) != SImode)
33223     return false;
33224
33225   /* We are trying to match:
33226      prev (movw)  == (set (reg r0) (const_int imm16))
33227      curr (movt) == (set (zero_extract (reg r0)
33228                                         (const_int 16)
33229                                         (const_int 16))
33230                           (const_int imm16_1))
33231      or
33232      prev (movw) == (set (reg r1)
33233                           (high (symbol_ref ("SYM"))))
33234     curr (movt) == (set (reg r0)
33235                         (lo_sum (reg r1)
33236                                 (symbol_ref ("SYM"))))  */
33237
33238     if (GET_CODE (set_dest) == ZERO_EXTRACT)
33239       {
33240         if (CONST_INT_P (SET_SRC (curr_set))
33241             && CONST_INT_P (SET_SRC (prev_set))
33242             && REG_P (XEXP (set_dest, 0))
33243             && REG_P (SET_DEST (prev_set))
33244             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33245           return true;
33246
33247       }
33248     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33249              && REG_P (SET_DEST (curr_set))
33250              && REG_P (SET_DEST (prev_set))
33251              && GET_CODE (SET_SRC (prev_set)) == HIGH
33252              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33253       return true;
33254
33255   return false;
33256 }
33257
33258 static bool
33259 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33260 {
33261   rtx prev_set = single_set (prev);
33262   rtx curr_set = single_set (curr);
33263
33264   if (!prev_set
33265       || !curr_set)
33266     return false;
33267
33268   if (any_condjump_p (curr))
33269     return false;
33270
33271   if (!arm_macro_fusion_p ())
33272     return false;
33273
33274   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33275       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33276     return true;
33277
33278   return false;
33279 }
33280
33281 /* Return true iff the instruction fusion described by OP is enabled.  */
33282 bool
33283 arm_fusion_enabled_p (tune_params::fuse_ops op)
33284 {
33285   return current_tune->fusible_ops & op;
33286 }
33287
33288 /* Return TRUE if return address signing mechanism is enabled.  */
33289 bool
33290 arm_current_function_pac_enabled_p (void)
33291 {
33292   return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33293           || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33294               && !crtl->is_leaf));
33295 }
33296
33297 /* Raise an error if the current target arch is not bti compatible.  */
33298 void aarch_bti_arch_check (void)
33299 {
33300   if (!arm_arch8m_main)
33301     error ("This architecture does not support branch protection instructions");
33302 }
33303
33304 /* Return TRUE if Branch Target Identification Mechanism is enabled.  */
33305 bool
33306 aarch_bti_enabled (void)
33307 {
33308   return aarch_enable_bti != 0;
33309 }
33310
33311 /* Check if INSN is a BTI J insn.  */
33312 bool
33313 aarch_bti_j_insn_p (rtx_insn *insn)
33314 {
33315   if (!insn || !INSN_P (insn))
33316     return false;
33317
33318   rtx pat = PATTERN (insn);
33319   return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33320 }
33321
33322 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
33323 bool
33324 aarch_pac_insn_p (rtx x)
33325 {
33326   if (!x || !INSN_P (x))
33327     return false;
33328
33329   rtx pat = PATTERN (x);
33330
33331   if (GET_CODE (pat) == SET)
33332     {
33333       rtx tmp = XEXP (pat, 1);
33334       if (tmp
33335           && ((GET_CODE (tmp) == UNSPEC
33336                && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33337               || (GET_CODE (tmp) == UNSPEC_VOLATILE
33338                   && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33339         return true;
33340     }
33341
33342   return false;
33343 }
33344
33345  /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33346     For Arm, both of these map to a simple BTI instruction.  */
33347
33348 rtx
33349 aarch_gen_bti_c (void)
33350 {
33351   return gen_bti_nop ();
33352 }
33353
33354 rtx
33355 aarch_gen_bti_j (void)
33356 {
33357   return gen_bti_nop ();
33358 }
33359
33360 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
33361    scheduled for speculative execution.  Reject the long-running division
33362    and square-root instructions.  */
33363
33364 static bool
33365 arm_sched_can_speculate_insn (rtx_insn *insn)
33366 {
33367   switch (get_attr_type (insn))
33368     {
33369       case TYPE_SDIV:
33370       case TYPE_UDIV:
33371       case TYPE_FDIVS:
33372       case TYPE_FDIVD:
33373       case TYPE_FSQRTS:
33374       case TYPE_FSQRTD:
33375       case TYPE_NEON_FP_SQRT_S:
33376       case TYPE_NEON_FP_SQRT_D:
33377       case TYPE_NEON_FP_SQRT_S_Q:
33378       case TYPE_NEON_FP_SQRT_D_Q:
33379       case TYPE_NEON_FP_DIV_S:
33380       case TYPE_NEON_FP_DIV_D:
33381       case TYPE_NEON_FP_DIV_S_Q:
33382       case TYPE_NEON_FP_DIV_D_Q:
33383         return false;
33384       default:
33385         return true;
33386     }
33387 }
33388
33389 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
33390
33391 static unsigned HOST_WIDE_INT
33392 arm_asan_shadow_offset (void)
33393 {
33394   return HOST_WIDE_INT_1U << 29;
33395 }
33396
33397
33398 /* This is a temporary fix for PR60655.  Ideally we need
33399    to handle most of these cases in the generic part but
33400    currently we reject minus (..) (sym_ref).  We try to
33401    ameliorate the case with minus (sym_ref1) (sym_ref2)
33402    where they are in the same section.  */
33403
33404 static bool
33405 arm_const_not_ok_for_debug_p (rtx p)
33406 {
33407   tree decl_op0 = NULL;
33408   tree decl_op1 = NULL;
33409
33410   if (GET_CODE (p) == UNSPEC)
33411     return true;
33412   if (GET_CODE (p) == MINUS)
33413     {
33414       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33415         {
33416           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33417           if (decl_op1
33418               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33419               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33420             {
33421               if ((VAR_P (decl_op1)
33422                    || TREE_CODE (decl_op1) == CONST_DECL)
33423                   && (VAR_P (decl_op0)
33424                       || TREE_CODE (decl_op0) == CONST_DECL))
33425                 return (get_variable_section (decl_op1, false)
33426                         != get_variable_section (decl_op0, false));
33427
33428               if (TREE_CODE (decl_op1) == LABEL_DECL
33429                   && TREE_CODE (decl_op0) == LABEL_DECL)
33430                 return (DECL_CONTEXT (decl_op1)
33431                         != DECL_CONTEXT (decl_op0));
33432             }
33433
33434           return true;
33435         }
33436     }
33437
33438   return false;
33439 }
33440
33441 /* return TRUE if x is a reference to a value in a constant pool */
33442 extern bool
33443 arm_is_constant_pool_ref (rtx x)
33444 {
33445   return (MEM_P (x)
33446           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33447           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33448 }
33449
33450 /* Remember the last target of arm_set_current_function.  */
33451 static GTY(()) tree arm_previous_fndecl;
33452
33453 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
33454
33455 void
33456 save_restore_target_globals (tree new_tree)
33457 {
33458   /* If we have a previous state, use it.  */
33459   if (TREE_TARGET_GLOBALS (new_tree))
33460     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33461   else if (new_tree == target_option_default_node)
33462     restore_target_globals (&default_target_globals);
33463   else
33464     {
33465       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
33466       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33467     }
33468
33469   arm_option_params_internal ();
33470 }
33471
33472 /* Invalidate arm_previous_fndecl.  */
33473
33474 void
33475 arm_reset_previous_fndecl (void)
33476 {
33477   arm_previous_fndecl = NULL_TREE;
33478 }
33479
33480 /* Establish appropriate back-end context for processing the function
33481    FNDECL.  The argument might be NULL to indicate processing at top
33482    level, outside of any function scope.  */
33483
33484 static void
33485 arm_set_current_function (tree fndecl)
33486 {
33487   if (!fndecl || fndecl == arm_previous_fndecl)
33488     return;
33489
33490   tree old_tree = (arm_previous_fndecl
33491                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33492                    : NULL_TREE);
33493
33494   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33495
33496   /* If current function has no attributes but previous one did,
33497      use the default node.  */
33498   if (! new_tree && old_tree)
33499     new_tree = target_option_default_node;
33500
33501   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
33502      the default have been handled by save_restore_target_globals from
33503      arm_pragma_target_parse.  */
33504   if (old_tree == new_tree)
33505     return;
33506
33507   arm_previous_fndecl = fndecl;
33508
33509   /* First set the target options.  */
33510   cl_target_option_restore (&global_options, &global_options_set,
33511                             TREE_TARGET_OPTION (new_tree));
33512
33513   save_restore_target_globals (new_tree);
33514
33515   arm_override_options_after_change_1 (&global_options, &global_options_set);
33516 }
33517
33518 /* Implement TARGET_OPTION_PRINT.  */
33519
33520 static void
33521 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33522 {
33523   int flags = ptr->x_target_flags;
33524   const char *fpu_name;
33525
33526   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33527               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33528
33529   fprintf (file, "%*sselected isa %s\n", indent, "",
33530            TARGET_THUMB2_P (flags) ? "thumb2" :
33531            TARGET_THUMB_P (flags) ? "thumb1" :
33532            "arm");
33533
33534   if (ptr->x_arm_arch_string)
33535     fprintf (file, "%*sselected architecture %s\n", indent, "",
33536              ptr->x_arm_arch_string);
33537
33538   if (ptr->x_arm_cpu_string)
33539     fprintf (file, "%*sselected CPU %s\n", indent, "",
33540              ptr->x_arm_cpu_string);
33541
33542   if (ptr->x_arm_tune_string)
33543     fprintf (file, "%*sselected tune %s\n", indent, "",
33544              ptr->x_arm_tune_string);
33545
33546   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33547 }
33548
33549 /* Hook to determine if one function can safely inline another.  */
33550
33551 static bool
33552 arm_can_inline_p (tree caller, tree callee)
33553 {
33554   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33555   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33556   bool can_inline = true;
33557
33558   struct cl_target_option *caller_opts
33559         = TREE_TARGET_OPTION (caller_tree ? caller_tree
33560                                            : target_option_default_node);
33561
33562   struct cl_target_option *callee_opts
33563         = TREE_TARGET_OPTION (callee_tree ? callee_tree
33564                                            : target_option_default_node);
33565
33566   if (callee_opts == caller_opts)
33567     return true;
33568
33569   /* Callee's ISA features should be a subset of the caller's.  */
33570   struct arm_build_target caller_target;
33571   struct arm_build_target callee_target;
33572   caller_target.isa = sbitmap_alloc (isa_num_bits);
33573   callee_target.isa = sbitmap_alloc (isa_num_bits);
33574
33575   arm_configure_build_target (&caller_target, caller_opts, false);
33576   arm_configure_build_target (&callee_target, callee_opts, false);
33577   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33578     can_inline = false;
33579
33580   sbitmap_free (caller_target.isa);
33581   sbitmap_free (callee_target.isa);
33582
33583   /* OK to inline between different modes.
33584      Function with mode specific instructions, e.g using asm,
33585      must be explicitly protected with noinline.  */
33586   return can_inline;
33587 }
33588
33589 /* Hook to fix function's alignment affected by target attribute.  */
33590
33591 static void
33592 arm_relayout_function (tree fndecl)
33593 {
33594   if (DECL_USER_ALIGN (fndecl))
33595     return;
33596
33597   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33598
33599   if (!callee_tree)
33600     callee_tree = target_option_default_node;
33601
33602   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33603   SET_DECL_ALIGN
33604     (fndecl,
33605      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33606 }
33607
33608 /* Inner function to process the attribute((target(...))), take an argument and
33609    set the current options from the argument.  If we have a list, recursively
33610    go over the list.  */
33611
33612 static bool
33613 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33614 {
33615   if (TREE_CODE (args) == TREE_LIST)
33616     {
33617       bool ret = true;
33618
33619       for (; args; args = TREE_CHAIN (args))
33620         if (TREE_VALUE (args)
33621             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33622           ret = false;
33623       return ret;
33624     }
33625
33626   else if (TREE_CODE (args) != STRING_CST)
33627     {
33628       error ("attribute %<target%> argument not a string");
33629       return false;
33630     }
33631
33632   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33633   char *q;
33634
33635   while ((q = strtok (argstr, ",")) != NULL)
33636     {
33637       argstr = NULL;
33638       if (!strcmp (q, "thumb"))
33639         {
33640           opts->x_target_flags |= MASK_THUMB;
33641           if (TARGET_FDPIC && !arm_arch_thumb2)
33642             sorry ("FDPIC mode is not supported in Thumb-1 mode");
33643         }
33644
33645       else if (!strcmp (q, "arm"))
33646         opts->x_target_flags &= ~MASK_THUMB;
33647
33648       else if (!strcmp (q, "general-regs-only"))
33649         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33650
33651       else if (startswith (q, "fpu="))
33652         {
33653           int fpu_index;
33654           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33655                                        &fpu_index, CL_TARGET))
33656             {
33657               error ("invalid fpu for target attribute or pragma %qs", q);
33658               return false;
33659             }
33660           if (fpu_index == TARGET_FPU_auto)
33661             {
33662               /* This doesn't really make sense until we support
33663                  general dynamic selection of the architecture and all
33664                  sub-features.  */
33665               sorry ("auto fpu selection not currently permitted here");
33666               return false;
33667             }
33668           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33669         }
33670       else if (startswith (q, "arch="))
33671         {
33672           char *arch = q + 5;
33673           const arch_option *arm_selected_arch
33674              = arm_parse_arch_option_name (all_architectures, "arch", arch);
33675
33676           if (!arm_selected_arch)
33677             {
33678               error ("invalid architecture for target attribute or pragma %qs",
33679                      q);
33680               return false;
33681             }
33682
33683           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33684         }
33685       else if (q[0] == '+')
33686         {
33687           opts->x_arm_arch_string
33688             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33689         }
33690       else
33691         {
33692           error ("unknown target attribute or pragma %qs", q);
33693           return false;
33694         }
33695     }
33696
33697   return true;
33698 }
33699
33700 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
33701
33702 tree
33703 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33704                                  struct gcc_options *opts_set)
33705 {
33706   struct cl_target_option cl_opts;
33707
33708   if (!arm_valid_target_attribute_rec (args, opts))
33709     return NULL_TREE;
33710
33711   cl_target_option_save (&cl_opts, opts, opts_set);
33712   arm_configure_build_target (&arm_active_target, &cl_opts, false);
33713   arm_option_check_internal (opts);
33714   /* Do any overrides, such as global options arch=xxx.
33715      We do this since arm_active_target was overridden.  */
33716   arm_option_reconfigure_globals ();
33717   arm_options_perform_arch_sanity_checks ();
33718   arm_option_override_internal (opts, opts_set);
33719
33720   return build_target_option_node (opts, opts_set);
33721 }
33722
33723 static void
33724 add_attribute (const char * mode, tree *attributes)
33725 {
33726   size_t len = strlen (mode);
33727   tree value = build_string (len, mode);
33728
33729   TREE_TYPE (value) = build_array_type (char_type_node,
33730                                         build_index_type (size_int (len)));
33731
33732   *attributes = tree_cons (get_identifier ("target"),
33733                            build_tree_list (NULL_TREE, value),
33734                            *attributes);
33735 }
33736
33737 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33738
33739 static void
33740 arm_insert_attributes (tree fndecl, tree * attributes)
33741 {
33742   const char *mode;
33743
33744   if (! TARGET_FLIP_THUMB)
33745     return;
33746
33747   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33748       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33749    return;
33750
33751   /* Nested definitions must inherit mode.  */
33752   if (current_function_decl)
33753    {
33754      mode = TARGET_THUMB ? "thumb" : "arm";
33755      add_attribute (mode, attributes);
33756      return;
33757    }
33758
33759   /* If there is already a setting don't change it.  */
33760   if (lookup_attribute ("target", *attributes) != NULL)
33761     return;
33762
33763   mode = thumb_flipper ? "thumb" : "arm";
33764   add_attribute (mode, attributes);
33765
33766   thumb_flipper = !thumb_flipper;
33767 }
33768
33769 /* Hook to validate attribute((target("string"))).  */
33770
33771 static bool
33772 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33773                               tree args, int ARG_UNUSED (flags))
33774 {
33775   bool ret = true;
33776   struct gcc_options func_options, func_options_set;
33777   tree cur_tree, new_optimize;
33778   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33779
33780   /* Get the optimization options of the current function.  */
33781   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33782
33783   /* If the function changed the optimization levels as well as setting target
33784      options, start with the optimizations specified.  */
33785   if (!func_optimize)
33786     func_optimize = optimization_default_node;
33787
33788   /* Init func_options.  */
33789   memset (&func_options, 0, sizeof (func_options));
33790   init_options_struct (&func_options, NULL);
33791   lang_hooks.init_options_struct (&func_options);
33792   memset (&func_options_set, 0, sizeof (func_options_set));
33793
33794   /* Initialize func_options to the defaults.  */
33795   cl_optimization_restore (&func_options, &func_options_set,
33796                            TREE_OPTIMIZATION (func_optimize));
33797
33798   cl_target_option_restore (&func_options, &func_options_set,
33799                             TREE_TARGET_OPTION (target_option_default_node));
33800
33801   /* Set func_options flags with new target mode.  */
33802   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33803                                               &func_options_set);
33804
33805   if (cur_tree == NULL_TREE)
33806     ret = false;
33807
33808   new_optimize = build_optimization_node (&func_options, &func_options_set);
33809
33810   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33811
33812   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33813
33814   return ret;
33815 }
33816
33817 /* Match an ISA feature bitmap to a named FPU.  We always use the
33818    first entry that exactly matches the feature set, so that we
33819    effectively canonicalize the FPU name for the assembler.  */
33820 static const char*
33821 arm_identify_fpu_from_isa (sbitmap isa)
33822 {
33823   auto_sbitmap fpubits (isa_num_bits);
33824   auto_sbitmap cand_fpubits (isa_num_bits);
33825
33826   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33827
33828   /* If there are no ISA feature bits relating to the FPU, we must be
33829      doing soft-float.  */
33830   if (bitmap_empty_p (fpubits))
33831     return "softvfp";
33832
33833   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33834     {
33835       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33836       if (bitmap_equal_p (fpubits, cand_fpubits))
33837         return all_fpus[i].name;
33838     }
33839   /* We must find an entry, or things have gone wrong.  */
33840   gcc_unreachable ();
33841 }
33842
33843 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33844    by the function fndecl.  */
33845 void
33846 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33847 {
33848   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33849
33850   struct cl_target_option *targ_options;
33851   if (target_parts)
33852     targ_options = TREE_TARGET_OPTION (target_parts);
33853   else
33854     targ_options = TREE_TARGET_OPTION (target_option_current_node);
33855   gcc_assert (targ_options);
33856
33857   arm_print_asm_arch_directives (stream, targ_options);
33858
33859   fprintf (stream, "\t.syntax unified\n");
33860
33861   if (TARGET_THUMB)
33862     {
33863       if (is_called_in_ARM_mode (decl)
33864           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33865               && cfun->is_thunk))
33866         fprintf (stream, "\t.code 32\n");
33867       else if (TARGET_THUMB1)
33868         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33869       else
33870         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33871     }
33872   else
33873     fprintf (stream, "\t.arm\n");
33874
33875   if (TARGET_POKE_FUNCTION_NAME)
33876     arm_poke_function_name (stream, (const char *) name);
33877 }
33878
33879 /* If MEM is in the form of [base+offset], extract the two parts
33880    of address and set to BASE and OFFSET, otherwise return false
33881    after clearing BASE and OFFSET.  */
33882
33883 static bool
33884 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33885 {
33886   rtx addr;
33887
33888   gcc_assert (MEM_P (mem));
33889
33890   addr = XEXP (mem, 0);
33891
33892   /* Strip off const from addresses like (const (addr)).  */
33893   if (GET_CODE (addr) == CONST)
33894     addr = XEXP (addr, 0);
33895
33896   if (REG_P (addr))
33897     {
33898       *base = addr;
33899       *offset = const0_rtx;
33900       return true;
33901     }
33902
33903   if (GET_CODE (addr) == PLUS
33904       && GET_CODE (XEXP (addr, 0)) == REG
33905       && CONST_INT_P (XEXP (addr, 1)))
33906     {
33907       *base = XEXP (addr, 0);
33908       *offset = XEXP (addr, 1);
33909       return true;
33910     }
33911
33912   *base = NULL_RTX;
33913   *offset = NULL_RTX;
33914
33915   return false;
33916 }
33917
33918 /* If INSN is a load or store of address in the form of [base+offset],
33919    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
33920    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
33921    otherwise return FALSE.  */
33922
33923 static bool
33924 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33925 {
33926   rtx x, dest, src;
33927
33928   gcc_assert (INSN_P (insn));
33929   x = PATTERN (insn);
33930   if (GET_CODE (x) != SET)
33931     return false;
33932
33933   src = SET_SRC (x);
33934   dest = SET_DEST (x);
33935   if (REG_P (src) && MEM_P (dest))
33936     {
33937       *is_load = false;
33938       extract_base_offset_in_addr (dest, base, offset);
33939     }
33940   else if (MEM_P (src) && REG_P (dest))
33941     {
33942       *is_load = true;
33943       extract_base_offset_in_addr (src, base, offset);
33944     }
33945   else
33946     return false;
33947
33948   return (*base != NULL_RTX && *offset != NULL_RTX);
33949 }
33950
33951 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33952
33953    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33954    and PRI are only calculated for these instructions.  For other instruction,
33955    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33956    instruction fusion can be supported by returning different priorities.
33957
33958    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33959
33960 static void
33961 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33962                            int *fusion_pri, int *pri)
33963 {
33964   int tmp, off_val;
33965   bool is_load;
33966   rtx base, offset;
33967
33968   gcc_assert (INSN_P (insn));
33969
33970   tmp = max_pri - 1;
33971   if (!fusion_load_store (insn, &base, &offset, &is_load))
33972     {
33973       *pri = tmp;
33974       *fusion_pri = tmp;
33975       return;
33976     }
33977
33978   /* Load goes first.  */
33979   if (is_load)
33980     *fusion_pri = tmp - 1;
33981   else
33982     *fusion_pri = tmp - 2;
33983
33984   tmp /= 2;
33985
33986   /* INSN with smaller base register goes first.  */
33987   tmp -= ((REGNO (base) & 0xff) << 20);
33988
33989   /* INSN with smaller offset goes first.  */
33990   off_val = (int)(INTVAL (offset));
33991   if (off_val >= 0)
33992     tmp -= (off_val & 0xfffff);
33993   else
33994     tmp += ((- off_val) & 0xfffff);
33995
33996   *pri = tmp;
33997   return;
33998 }
33999
34000
34001 /* Construct and return a PARALLEL RTX vector with elements numbering the
34002    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34003    the vector - from the perspective of the architecture.  This does not
34004    line up with GCC's perspective on lane numbers, so we end up with
34005    different masks depending on our target endian-ness.  The diagram
34006    below may help.  We must draw the distinction when building masks
34007    which select one half of the vector.  An instruction selecting
34008    architectural low-lanes for a big-endian target, must be described using
34009    a mask selecting GCC high-lanes.
34010
34011                  Big-Endian             Little-Endian
34012
34013 GCC             0   1   2   3           3   2   1   0
34014               | x | x | x | x |       | x | x | x | x |
34015 Architecture    3   2   1   0           3   2   1   0
34016
34017 Low Mask:         { 2, 3 }                { 0, 1 }
34018 High Mask:        { 0, 1 }                { 2, 3 }
34019 */
34020
34021 rtx
34022 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
34023 {
34024   int nunits = GET_MODE_NUNITS (mode);
34025   rtvec v = rtvec_alloc (nunits / 2);
34026   int high_base = nunits / 2;
34027   int low_base = 0;
34028   int base;
34029   rtx t1;
34030   int i;
34031
34032   if (BYTES_BIG_ENDIAN)
34033     base = high ? low_base : high_base;
34034   else
34035     base = high ? high_base : low_base;
34036
34037   for (i = 0; i < nunits / 2; i++)
34038     RTVEC_ELT (v, i) = GEN_INT (base + i);
34039
34040   t1 = gen_rtx_PARALLEL (mode, v);
34041   return t1;
34042 }
34043
34044 /* Check OP for validity as a PARALLEL RTX vector with elements
34045    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34046    from the perspective of the architecture.  See the diagram above
34047    arm_simd_vect_par_cnst_half_p for more details.  */
34048
34049 bool
34050 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
34051                                        bool high)
34052 {
34053   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
34054   HOST_WIDE_INT count_op = XVECLEN (op, 0);
34055   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
34056   int i = 0;
34057
34058   if (!VECTOR_MODE_P (mode))
34059     return false;
34060
34061   if (count_op != count_ideal)
34062     return false;
34063
34064   for (i = 0; i < count_ideal; i++)
34065     {
34066       rtx elt_op = XVECEXP (op, 0, i);
34067       rtx elt_ideal = XVECEXP (ideal, 0, i);
34068
34069       if (!CONST_INT_P (elt_op)
34070           || INTVAL (elt_ideal) != INTVAL (elt_op))
34071         return false;
34072     }
34073   return true;
34074 }
34075
34076 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34077    in Thumb1.  */
34078 static bool
34079 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
34080                          const_tree)
34081 {
34082   /* For now, we punt and not handle this for TARGET_THUMB1.  */
34083   if (vcall_offset && TARGET_THUMB1)
34084     return false;
34085
34086   /* Otherwise ok.  */
34087   return true;
34088 }
34089
34090 /* Generate RTL for a conditional branch with rtx comparison CODE in
34091    mode CC_MODE. The destination of the unlikely conditional branch
34092    is LABEL_REF.  */
34093
34094 void
34095 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
34096                           rtx label_ref)
34097 {
34098   rtx x;
34099   x = gen_rtx_fmt_ee (code, VOIDmode,
34100                       gen_rtx_REG (cc_mode, CC_REGNUM),
34101                       const0_rtx);
34102
34103   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34104                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
34105                             pc_rtx);
34106   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34107 }
34108
34109 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34110
34111    For pure-code sections there is no letter code for this attribute, so
34112    output all the section flags numerically when this is needed.  */
34113
34114 static bool
34115 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34116 {
34117
34118   if (flags & SECTION_ARM_PURECODE)
34119     {
34120       *num = 0x20000000;
34121
34122       if (!(flags & SECTION_DEBUG))
34123         *num |= 0x2;
34124       if (flags & SECTION_EXCLUDE)
34125         *num |= 0x80000000;
34126       if (flags & SECTION_WRITE)
34127         *num |= 0x1;
34128       if (flags & SECTION_CODE)
34129         *num |= 0x4;
34130       if (flags & SECTION_MERGE)
34131         *num |= 0x10;
34132       if (flags & SECTION_STRINGS)
34133         *num |= 0x20;
34134       if (flags & SECTION_TLS)
34135         *num |= 0x400;
34136       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34137         *num |= 0x200;
34138
34139         return true;
34140     }
34141
34142   return false;
34143 }
34144
34145 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34146
34147    If pure-code is passed as an option, make sure all functions are in
34148    sections that have the SHF_ARM_PURECODE attribute.  */
34149
34150 static section *
34151 arm_function_section (tree decl, enum node_frequency freq,
34152                       bool startup, bool exit)
34153 {
34154   const char * section_name;
34155   section * sec;
34156
34157   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34158     return default_function_section (decl, freq, startup, exit);
34159
34160   if (!target_pure_code)
34161     return default_function_section (decl, freq, startup, exit);
34162
34163
34164   section_name = DECL_SECTION_NAME (decl);
34165
34166   /* If a function is not in a named section then it falls under the 'default'
34167      text section, also known as '.text'.  We can preserve previous behavior as
34168      the default text section already has the SHF_ARM_PURECODE section
34169      attribute.  */
34170   if (!section_name)
34171     {
34172       section *default_sec = default_function_section (decl, freq, startup,
34173                                                        exit);
34174
34175       /* If default_sec is not null, then it must be a special section like for
34176          example .text.startup.  We set the pure-code attribute and return the
34177          same section to preserve existing behavior.  */
34178       if (default_sec)
34179           default_sec->common.flags |= SECTION_ARM_PURECODE;
34180       return default_sec;
34181     }
34182
34183   /* Otherwise look whether a section has already been created with
34184      'section_name'.  */
34185   sec = get_named_section (decl, section_name, 0);
34186   if (!sec)
34187     /* If that is not the case passing NULL as the section's name to
34188        'get_named_section' will create a section with the declaration's
34189        section name.  */
34190     sec = get_named_section (decl, NULL, 0);
34191
34192   /* Set the SHF_ARM_PURECODE attribute.  */
34193   sec->common.flags |= SECTION_ARM_PURECODE;
34194
34195   return sec;
34196 }
34197
34198 /* Implements the TARGET_SECTION_FLAGS hook.
34199
34200    If DECL is a function declaration and pure-code is passed as an option
34201    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
34202    section's name and RELOC indicates whether the declarations initializer may
34203    contain runtime relocations.  */
34204
34205 static unsigned int
34206 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34207 {
34208   unsigned int flags = default_section_type_flags (decl, name, reloc);
34209
34210   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34211     flags |= SECTION_ARM_PURECODE;
34212
34213   return flags;
34214 }
34215
34216 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
34217
34218 static void
34219 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34220                            rtx op0, rtx op1,
34221                            rtx *quot_p, rtx *rem_p)
34222 {
34223   if (mode == SImode)
34224     gcc_assert (!TARGET_IDIV);
34225
34226   scalar_int_mode libval_mode
34227     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34228
34229   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34230                                         libval_mode, op0, mode, op1, mode);
34231
34232   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34233   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34234                                        GET_MODE_SIZE (mode));
34235
34236   gcc_assert (quotient);
34237   gcc_assert (remainder);
34238
34239   *quot_p = quotient;
34240   *rem_p = remainder;
34241 }
34242
34243 /*  This function checks for the availability of the coprocessor builtin passed
34244     in BUILTIN for the current target.  Returns true if it is available and
34245     false otherwise.  If a BUILTIN is passed for which this function has not
34246     been implemented it will cause an exception.  */
34247
34248 bool
34249 arm_coproc_builtin_available (enum unspecv builtin)
34250 {
34251   /* None of these builtins are available in Thumb mode if the target only
34252      supports Thumb-1.  */
34253   if (TARGET_THUMB1)
34254     return false;
34255
34256   switch (builtin)
34257     {
34258       case VUNSPEC_CDP:
34259       case VUNSPEC_LDC:
34260       case VUNSPEC_LDCL:
34261       case VUNSPEC_STC:
34262       case VUNSPEC_STCL:
34263       case VUNSPEC_MCR:
34264       case VUNSPEC_MRC:
34265         if (arm_arch4)
34266           return true;
34267         break;
34268       case VUNSPEC_CDP2:
34269       case VUNSPEC_LDC2:
34270       case VUNSPEC_LDC2L:
34271       case VUNSPEC_STC2:
34272       case VUNSPEC_STC2L:
34273       case VUNSPEC_MCR2:
34274       case VUNSPEC_MRC2:
34275         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34276            ARMv8-{A,M}.  */
34277         if (arm_arch5t)
34278           return true;
34279         break;
34280       case VUNSPEC_MCRR:
34281       case VUNSPEC_MRRC:
34282         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34283            ARMv8-{A,M}.  */
34284         if (arm_arch6 || arm_arch5te)
34285           return true;
34286         break;
34287       case VUNSPEC_MCRR2:
34288       case VUNSPEC_MRRC2:
34289         if (arm_arch6)
34290           return true;
34291         break;
34292       default:
34293         gcc_unreachable ();
34294     }
34295   return false;
34296 }
34297
34298 /* This function returns true if OP is a valid memory operand for the ldc and
34299    stc coprocessor instructions and false otherwise.  */
34300
34301 bool
34302 arm_coproc_ldc_stc_legitimate_address (rtx op)
34303 {
34304   HOST_WIDE_INT range;
34305   /* Has to be a memory operand.  */
34306   if (!MEM_P (op))
34307     return false;
34308
34309   op = XEXP (op, 0);
34310
34311   /* We accept registers.  */
34312   if (REG_P (op))
34313     return true;
34314
34315   switch GET_CODE (op)
34316     {
34317       case PLUS:
34318         {
34319           /* Or registers with an offset.  */
34320           if (!REG_P (XEXP (op, 0)))
34321             return false;
34322
34323           op = XEXP (op, 1);
34324
34325           /* The offset must be an immediate though.  */
34326           if (!CONST_INT_P (op))
34327             return false;
34328
34329           range = INTVAL (op);
34330
34331           /* Within the range of [-1020,1020].  */
34332           if (!IN_RANGE (range, -1020, 1020))
34333             return false;
34334
34335           /* And a multiple of 4.  */
34336           return (range % 4) == 0;
34337         }
34338       case PRE_INC:
34339       case POST_INC:
34340       case PRE_DEC:
34341       case POST_DEC:
34342         return REG_P (XEXP (op, 0));
34343       default:
34344         gcc_unreachable ();
34345     }
34346   return false;
34347 }
34348
34349 /* Return the diagnostic message string if conversion from FROMTYPE to
34350    TOTYPE is not allowed, NULL otherwise.  */
34351
34352 static const char *
34353 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34354 {
34355   if (element_mode (fromtype) != element_mode (totype))
34356     {
34357       /* Do no allow conversions to/from BFmode scalar types.  */
34358       if (TYPE_MODE (fromtype) == BFmode)
34359         return N_("invalid conversion from type %<bfloat16_t%>");
34360       if (TYPE_MODE (totype) == BFmode)
34361         return N_("invalid conversion to type %<bfloat16_t%>");
34362     }
34363
34364   /* Conversion allowed.  */
34365   return NULL;
34366 }
34367
34368 /* Return the diagnostic message string if the unary operation OP is
34369    not permitted on TYPE, NULL otherwise.  */
34370
34371 static const char *
34372 arm_invalid_unary_op (int op, const_tree type)
34373 {
34374   /* Reject all single-operand operations on BFmode except for &.  */
34375   if (element_mode (type) == BFmode && op != ADDR_EXPR)
34376     return N_("operation not permitted on type %<bfloat16_t%>");
34377
34378   /* Operation allowed.  */
34379   return NULL;
34380 }
34381
34382 /* Return the diagnostic message string if the binary operation OP is
34383    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
34384
34385 static const char *
34386 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34387                            const_tree type2)
34388 {
34389   /* Reject all 2-operand operations on BFmode.  */
34390   if (element_mode (type1) == BFmode
34391       || element_mode (type2) == BFmode)
34392     return N_("operation not permitted on type %<bfloat16_t%>");
34393
34394   /* Operation allowed.  */
34395   return NULL;
34396 }
34397
34398 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34399
34400    In VFPv1, VFP registers could only be accessed in the mode they were
34401    set, so subregs would be invalid there.  However, we don't support
34402    VFPv1 at the moment, and the restriction was lifted in VFPv2.
34403
34404    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34405    VFP registers in little-endian order.  We can't describe that accurately to
34406    GCC, so avoid taking subregs of such values.
34407
34408    The only exception is going from a 128-bit to a 64-bit type.  In that
34409    case the data layout happens to be consistent for big-endian, so we
34410    explicitly allow that case.  */
34411
34412 static bool
34413 arm_can_change_mode_class (machine_mode from, machine_mode to,
34414                            reg_class_t rclass)
34415 {
34416   if (TARGET_BIG_END
34417       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34418       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34419           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34420       && reg_classes_intersect_p (VFP_REGS, rclass))
34421     return false;
34422   return true;
34423 }
34424
34425 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
34426    strcpy from constants will be faster.  */
34427
34428 static HOST_WIDE_INT
34429 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34430 {
34431   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34432   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34433     return MAX (align, BITS_PER_WORD * factor);
34434   return align;
34435 }
34436
34437 /* Emit a speculation barrier on target architectures that do not have
34438    DSB/ISB directly.  Such systems probably don't need a barrier
34439    themselves, but if the code is ever run on a later architecture, it
34440    might become a problem.  */
34441 void
34442 arm_emit_speculation_barrier_function ()
34443 {
34444   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34445 }
34446
34447 /* Have we recorded an explicit access to the Q bit of APSR?.  */
34448 bool
34449 arm_q_bit_access (void)
34450 {
34451   if (cfun && cfun->decl)
34452     return lookup_attribute ("acle qbit",
34453                              DECL_ATTRIBUTES (cfun->decl));
34454   return true;
34455 }
34456
34457 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
34458 bool
34459 arm_ge_bits_access (void)
34460 {
34461   if (cfun && cfun->decl)
34462     return lookup_attribute ("acle gebits",
34463                              DECL_ATTRIBUTES (cfun->decl));
34464   return true;
34465 }
34466
34467 /* NULL if insn INSN is valid within a low-overhead loop.
34468    Otherwise return why doloop cannot be applied.  */
34469
34470 static const char *
34471 arm_invalid_within_doloop (const rtx_insn *insn)
34472 {
34473   if (!TARGET_HAVE_LOB)
34474     return default_invalid_within_doloop (insn);
34475
34476   if (CALL_P (insn))
34477     return "Function call in the loop.";
34478
34479   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34480     return "LR is used inside loop.";
34481
34482   return NULL;
34483 }
34484
34485 bool
34486 arm_target_insn_ok_for_lob (rtx insn)
34487 {
34488   basic_block bb = BLOCK_FOR_INSN (insn);
34489   /* Make sure the basic block of the target insn is a simple latch
34490      having as single predecessor and successor the body of the loop
34491      itself.  Only simple loops with a single basic block as body are
34492      supported for 'low over head loop' making sure that LE target is
34493      above LE itself in the generated code.  */
34494
34495   return single_succ_p (bb)
34496     && single_pred_p (bb)
34497     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34498     && contains_no_active_insn_p (bb);
34499 }
34500
34501 #if CHECKING_P
34502 namespace selftest {
34503
34504 /* Scan the static data tables generated by parsecpu.awk looking for
34505    potential issues with the data.  We primarily check for
34506    inconsistencies in the option extensions at present (extensions
34507    that duplicate others but aren't marked as aliases).  Furthermore,
34508    for correct canonicalization later options must never be a subset
34509    of an earlier option.  Any extension should also only specify other
34510    feature bits and never an architecture bit.  The architecture is inferred
34511    from the declaration of the extension.  */
34512 static void
34513 arm_test_cpu_arch_data (void)
34514 {
34515   const arch_option *arch;
34516   const cpu_option *cpu;
34517   auto_sbitmap target_isa (isa_num_bits);
34518   auto_sbitmap isa1 (isa_num_bits);
34519   auto_sbitmap isa2 (isa_num_bits);
34520
34521   for (arch = all_architectures; arch->common.name != NULL; ++arch)
34522     {
34523       const cpu_arch_extension *ext1, *ext2;
34524
34525       if (arch->common.extensions == NULL)
34526         continue;
34527
34528       arm_initialize_isa (target_isa, arch->common.isa_bits);
34529
34530       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34531         {
34532           if (ext1->alias)
34533             continue;
34534
34535           arm_initialize_isa (isa1, ext1->isa_bits);
34536           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34537             {
34538               if (ext2->alias || ext1->remove != ext2->remove)
34539                 continue;
34540
34541               arm_initialize_isa (isa2, ext2->isa_bits);
34542               /* If the option is a subset of the parent option, it doesn't
34543                  add anything and so isn't useful.  */
34544               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34545
34546               /* If the extension specifies any architectural bits then
34547                  disallow it.  Extensions should only specify feature bits.  */
34548               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34549             }
34550         }
34551     }
34552
34553   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34554     {
34555       const cpu_arch_extension *ext1, *ext2;
34556
34557       if (cpu->common.extensions == NULL)
34558         continue;
34559
34560       arm_initialize_isa (target_isa, arch->common.isa_bits);
34561
34562       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34563         {
34564           if (ext1->alias)
34565             continue;
34566
34567           arm_initialize_isa (isa1, ext1->isa_bits);
34568           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34569             {
34570               if (ext2->alias || ext1->remove != ext2->remove)
34571                 continue;
34572
34573               arm_initialize_isa (isa2, ext2->isa_bits);
34574               /* If the option is a subset of the parent option, it doesn't
34575                  add anything and so isn't useful.  */
34576               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34577
34578               /* If the extension specifies any architectural bits then
34579                  disallow it.  Extensions should only specify feature bits.  */
34580               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34581             }
34582         }
34583     }
34584 }
34585
34586 /* Scan the static data tables generated by parsecpu.awk looking for
34587    potential issues with the data.  Here we check for consistency between the
34588    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34589    a feature bit that is not defined by any FPU flag.  */
34590 static void
34591 arm_test_fpu_data (void)
34592 {
34593   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34594   auto_sbitmap fpubits (isa_num_bits);
34595   auto_sbitmap tmpset (isa_num_bits);
34596
34597   static const enum isa_feature fpu_bitlist_internal[]
34598     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34599   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34600
34601   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34602   {
34603     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34604     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34605     bitmap_clear (isa_all_fpubits_internal);
34606     bitmap_copy (isa_all_fpubits_internal, tmpset);
34607   }
34608
34609   if (!bitmap_empty_p (isa_all_fpubits_internal))
34610     {
34611         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34612                          " group that are not defined by any FPU.\n"
34613                          "       Check your arm-cpus.in.\n");
34614         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34615     }
34616 }
34617
34618 static void
34619 arm_run_selftests (void)
34620 {
34621   arm_test_cpu_arch_data ();
34622   arm_test_fpu_data ();
34623 }
34624 } /* Namespace selftest.  */
34625
34626 #undef TARGET_RUN_TARGET_SELFTESTS
34627 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34628 #endif /* CHECKING_P */
34629
34630 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34631    global variable based guard use the default else
34632    return a null tree.  */
34633 static tree
34634 arm_stack_protect_guard (void)
34635 {
34636   if (arm_stack_protector_guard == SSP_GLOBAL)
34637     return default_stack_protect_guard ();
34638
34639   return NULL_TREE;
34640 }
34641
34642 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34643    Unlike the arm version, we do NOT implement asm flag outputs.  */
34644
34645 rtx_insn *
34646 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34647                       vec<machine_mode> & /*input_modes*/,
34648                       vec<const char *> &constraints,
34649                       vec<rtx> &, vec<rtx> & /*clobbers*/,
34650                       HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34651 {
34652   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34653     if (startswith (constraints[i], "=@cc"))
34654       {
34655         sorry ("%<asm%> flags not supported in thumb1 mode");
34656         break;
34657       }
34658   return NULL;
34659 }
34660
34661 /* Generate code to enable conditional branches in functions over 1 MiB.
34662    Parameters are:
34663      operands: is the operands list of the asm insn (see arm_cond_branch or
34664        arm_cond_branch_reversed).
34665      pos_label: is an index into the operands array where operands[pos_label] is
34666        the asm label of the final jump destination.
34667      dest: is a string which is used to generate the asm label of the intermediate
34668        destination
34669    branch_format: is a string denoting the intermediate branch format, e.g.
34670      "beq", "bne", etc.  */
34671
34672 const char *
34673 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34674                     const char * branch_format)
34675 {
34676   rtx_code_label * tmp_label = gen_label_rtx ();
34677   char label_buf[256];
34678   char buffer[128];
34679   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34680                         CODE_LABEL_NUMBER (tmp_label));
34681   const char *label_ptr = arm_strip_name_encoding (label_buf);
34682   rtx dest_label = operands[pos_label];
34683   operands[pos_label] = tmp_label;
34684
34685   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34686   output_asm_insn (buffer, operands);
34687
34688   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34689   operands[pos_label] = dest_label;
34690   output_asm_insn (buffer, operands);
34691   return "";
34692 }
34693
34694 /* If given mode matches, load from memory to LO_REGS.
34695    (i.e [Rn], Rn <= LO_REGS).  */
34696 enum reg_class
34697 arm_mode_base_reg_class (machine_mode mode)
34698 {
34699   if (TARGET_HAVE_MVE
34700       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34701     return LO_REGS;
34702
34703   return MODE_BASE_REG_REG_CLASS (mode);
34704 }
34705
34706 struct gcc_target targetm = TARGET_INITIALIZER;
34707
34708 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
34709
34710 opt_machine_mode
34711 arm_get_mask_mode (machine_mode mode)
34712 {
34713   if (TARGET_HAVE_MVE)
34714     return arm_mode_to_pred_mode (mode);
34715
34716   return default_get_mask_mode (mode);
34717 }
34718
34719 /* Output assembly to read the thread pointer from the appropriate TPIDR
34720    register into DEST.  If PRED_P also emit the %? that can be used to
34721    output the predication code.  */
34722
34723 const char *
34724 arm_output_load_tpidr (rtx dst, bool pred_p)
34725 {
34726   char buf[64];
34727   int tpidr_coproc_num = -1;
34728   switch (target_thread_pointer)
34729     {
34730     case TP_TPIDRURW:
34731       tpidr_coproc_num = 2;
34732       break;
34733     case TP_TPIDRURO:
34734       tpidr_coproc_num = 3;
34735       break;
34736     case TP_TPIDRPRW:
34737       tpidr_coproc_num = 4;
34738       break;
34739     default:
34740       gcc_unreachable ();
34741     }
34742   snprintf (buf, sizeof (buf),
34743             "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34744             pred_p ? "%?" : "", tpidr_coproc_num);
34745   output_asm_insn (buf, &dst);
34746   return "";
34747 }
34748
34749 #include "gt-arm.h"