gcc/config/arm/arm.cc

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2023 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "cfgloop.h"
  36 #include "df.h"
  37 #include "tm_p.h"
  38 #include "stringpool.h"
  39 #include "attribs.h"
  40 #include "optabs.h"
  41 #include "regs.h"
  42 #include "emit-rtl.h"
  43 #include "recog.h"
  44 #include "cgraph.h"
  45 #include "diagnostic-core.h"
  46 #include "alias.h"
  47 #include "fold-const.h"
  48 #include "stor-layout.h"
  49 #include "calls.h"
  50 #include "varasm.h"
  51 #include "output.h"
  52 #include "insn-attr.h"
  53 #include "flags.h"
  54 #include "reload.h"
  55 #include "explow.h"
  56 #include "expr.h"
  57 #include "cfgrtl.h"
  58 #include "sched-int.h"
  59 #include "common/common-target.h"
  60 #include "langhooks.h"
  61 #include "intl.h"
  62 #include "libfuncs.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "gimple-iterator.h"
  73 #include "selftest.h"
  74 #include "tree-vectorizer.h"
  75 #include "opts.h"
  76 #include "aarch-common.h"
  77 #include "aarch-common-protos.h"
  78
  79 /* This file should be included last.  */
  80 #include "target-def.h"
  81
  82 /* Forward definitions of types.  */
  83 typedef struct minipool_node    Mnode;
  84 typedef struct minipool_fixup   Mfix;
  85
  86 void (*arm_lang_output_object_attributes_hook)(void);
  87
  88 struct four_ints
  89 {
  90   int i[4];
  91 };
  92
  93 /* Forward function declarations.  */
  94 static bool arm_const_not_ok_for_debug_p (rtx);
  95 static int arm_needs_doubleword_align (machine_mode, const_tree);
  96 static int arm_compute_static_chain_stack_bytes (void);
  97 static arm_stack_offsets *arm_get_frame_offsets (void);
  98 static void arm_compute_frame_layout (void);
  99 static void arm_add_gc_roots (void);
 100 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 101                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 102 static unsigned bit_count (unsigned long);
 103 static unsigned bitmap_popcount (const sbitmap);
 104 static int arm_address_register_rtx_p (rtx, int);
 105 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 106 static bool is_called_in_ARM_mode (tree);
 107 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 108 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 109 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 110 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 111 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 112 inline static int thumb1_index_register_rtx_p (rtx, int);
 113 static int thumb_far_jump_used_p (void);
 114 static bool thumb_force_lr_save (void);
 115 static unsigned arm_size_return_regs (void);
 116 static bool arm_assemble_integer (rtx, unsigned int, int);
 117 static void arm_print_operand (FILE *, rtx, int);
 118 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 119 static bool arm_print_operand_punct_valid_p (unsigned char code);
 120 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 121 static arm_cc get_arm_condition_code (rtx);
 122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 123 static const char *output_multi_immediate (rtx *, const char *, const char *,
 124                                            int, HOST_WIDE_INT);
 125 static const char *shift_op (rtx, HOST_WIDE_INT *);
 126 static struct machine_function *arm_init_machine_status (void);
 127 static void thumb_exit (FILE *, int);
 128 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 129 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 130 static Mnode *add_minipool_forward_ref (Mfix *);
 131 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 132 static Mnode *add_minipool_backward_ref (Mfix *);
 133 static void assign_minipool_offsets (Mfix *);
 134 static void arm_print_value (FILE *, rtx);
 135 static void dump_minipool (rtx_insn *);
 136 static int arm_barrier_cost (rtx_insn *);
 137 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 138 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 139 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 140                                machine_mode, rtx);
 141 static void arm_reorg (void);
 142 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 144 static unsigned long arm_compute_save_core_reg_mask (void);
 145 static unsigned long arm_isr_value (tree);
 146 static unsigned long arm_compute_func_type (void);
 147 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 149 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 151 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 152 #endif
 153 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 154 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 155 static void arm_output_function_epilogue (FILE *);
 156 static void arm_output_function_prologue (FILE *);
 157 static int arm_comp_type_attributes (const_tree, const_tree);
 158 static void arm_set_default_type_attributes (tree);
 159 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 161 static int optimal_immediate_sequence (enum rtx_code code,
 162                                        unsigned HOST_WIDE_INT val,
 163                                        struct four_ints *return_sequence);
 164 static int optimal_immediate_sequence_1 (enum rtx_code code,
 165                                          unsigned HOST_WIDE_INT val,
 166                                          struct four_ints *return_sequence,
 167                                          int i);
 168 static int arm_get_strip_length (int);
 169 static bool arm_function_ok_for_sibcall (tree, tree);
 170 static machine_mode arm_promote_function_mode (const_tree,
 171                                                     machine_mode, int *,
 172                                                     const_tree, int);
 173 static bool arm_return_in_memory (const_tree, const_tree);
 174 static rtx arm_function_value (const_tree, const_tree, bool);
 175 static rtx arm_libcall_value_1 (machine_mode);
 176 static rtx arm_libcall_value (machine_mode, const_rtx);
 177 static bool arm_function_value_regno_p (const unsigned int);
 178 static void arm_internal_label (FILE *, const char *, unsigned long);
 179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 180                                  tree);
 181 static bool arm_have_conditional_execution (void);
 182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 183 static bool arm_legitimate_constant_p (machine_mode, rtx);
 184 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 185 static int arm_insn_cost (rtx_insn *, bool);
 186 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 187 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 188 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 189 static void emit_constant_insn (rtx cond, rtx pattern);
 190 static rtx_insn *emit_set_insn (rtx, rtx);
 191 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 193 static void arm_emit_multi_reg_pop (unsigned long);
 194 static int vfp_emit_fstmd (int, int);
 195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 196 static int arm_arg_partial_bytes (cumulative_args_t,
 197                                   const function_arg_info &);
 198 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 199 static void arm_function_arg_advance (cumulative_args_t,
 200                                       const function_arg_info &);
 201 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 202 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 203 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 204                                       const_tree);
 205 static rtx aapcs_libcall_value (machine_mode);
 206 static int aapcs_select_return_coproc (const_tree, const_tree);
 207
 208 #ifdef OBJECT_FORMAT_ELF
 209 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 210 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 211 #endif
 212 #ifndef ARM_PE
 213 static void arm_encode_section_info (tree, rtx, int);
 214 #endif
 215
 216 static void arm_file_end (void);
 217 static void arm_file_start (void);
 218 static void arm_insert_attributes (tree, tree *);
 219
 220 static void arm_setup_incoming_varargs (cumulative_args_t,
 221                                         const function_arg_info &, int *, int);
 222 static bool arm_pass_by_reference (cumulative_args_t,
 223                                    const function_arg_info &);
 224 static bool arm_promote_prototypes (const_tree);
 225 static bool arm_default_short_enums (void);
 226 static bool arm_align_anon_bitfield (void);
 227 static bool arm_return_in_msb (const_tree);
 228 static bool arm_must_pass_in_stack (const function_arg_info &);
 229 static bool arm_return_in_memory (const_tree, const_tree);
 230 #if ARM_UNWIND_INFO
 231 static void arm_unwind_emit (FILE *, rtx_insn *);
 232 static bool arm_output_ttype (rtx);
 233 static void arm_asm_emit_except_personality (rtx);
 234 #endif
 235 static void arm_asm_init_sections (void);
 236 static rtx arm_dwarf_register_span (rtx);
 237
 238 static tree arm_cxx_guard_type (void);
 239 static bool arm_cxx_guard_mask_bit (void);
 240 static tree arm_get_cookie_size (tree);
 241 static bool arm_cookie_has_size (void);
 242 static bool arm_cxx_cdtor_returns_this (void);
 243 static bool arm_cxx_key_method_may_be_inline (void);
 244 static void arm_cxx_determine_class_data_visibility (tree);
 245 static bool arm_cxx_class_data_always_comdat (void);
 246 static bool arm_cxx_use_aeabi_atexit (void);
 247 static void arm_init_libfuncs (void);
 248 static tree arm_build_builtin_va_list (void);
 249 static void arm_expand_builtin_va_start (tree, rtx);
 250 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 251 static void arm_option_override (void);
 252 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 253                                 struct cl_target_option *);
 254 static void arm_override_options_after_change (void);
 255 static void arm_option_print (FILE *, int, struct cl_target_option *);
 256 static void arm_set_current_function (tree);
 257 static bool arm_can_inline_p (tree, tree);
 258 static void arm_relayout_function (tree);
 259 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 260 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 261 static bool arm_sched_can_speculate_insn (rtx_insn *);
 262 static bool arm_macro_fusion_p (void);
 263 static bool arm_cannot_copy_insn_p (rtx_insn *);
 264 static int arm_issue_rate (void);
 265 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 266 static int arm_first_cycle_multipass_dfa_lookahead (void);
 267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 268 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 269 static bool arm_output_addr_const_extra (FILE *, rtx);
 270 static bool arm_allocate_stack_slots_for_args (void);
 271 static bool arm_warn_func_return (tree);
 272 static tree arm_promoted_type (const_tree t);
 273 static bool arm_scalar_mode_supported_p (scalar_mode);
 274 static bool arm_frame_pointer_required (void);
 275 static bool arm_can_eliminate (const int, const int);
 276 static void arm_asm_trampoline_template (FILE *);
 277 static void arm_trampoline_init (rtx, tree, rtx);
 278 static rtx arm_trampoline_adjust_address (rtx);
 279 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 280 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 281 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 282 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 283 static bool arm_array_mode_supported_p (machine_mode,
 284                                         unsigned HOST_WIDE_INT);
 285 static machine_mode arm_preferred_simd_mode (scalar_mode);
 286 static bool arm_class_likely_spilled_p (reg_class_t);
 287 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 288 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 289 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 290                                                      const_tree type,
 291                                                      int misalignment,
 292                                                      bool is_packed);
 293 static void arm_conditional_register_usage (void);
 294 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 295 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 296 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 297 static int arm_default_branch_cost (bool, bool);
 298 static int arm_cortex_a5_branch_cost (bool, bool);
 299 static int arm_cortex_m_branch_cost (bool, bool);
 300 static int arm_cortex_m7_branch_cost (bool, bool);
 301
 302 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
 303                                           rtx, const vec_perm_indices &);
 304
 305 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 306
 307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 308                                            tree vectype,
 309                                            int misalign ATTRIBUTE_UNUSED);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314
 315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 317                                      const_tree);
 318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 321                                                 int reloc);
 322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 328 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 329                                        vec<machine_mode> &,
 330                                        vec<const char *> &, vec<rtx> &,
 331                                        HARD_REG_SET &, location_t);
 332 static const char *arm_identify_fpu_from_isa (sbitmap);
 333 \f
 334 /* Table of machine attributes.  */
 335 static const attribute_spec arm_gnu_attributes[] =
 336 {
 337   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 338        affects_type_identity, handler, exclude } */
 339   /* Function calls made to this symbol must be done indirectly, because
 340      it may lie outside of the 26 bit addressing range of a normal function
 341      call.  */
 342   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 343   /* Whereas these functions are always known to reside within the 26 bit
 344      addressing range.  */
 345   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 346   /* Specify the procedure call conventions for a function.  */
 347   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 348     NULL },
 349   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 350   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 351     NULL },
 352   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 353     NULL },
 354   { "naked",        0, 0, true,  false, false, false,
 355     arm_handle_fndecl_attribute, NULL },
 356 #ifdef ARM_PE
 357   /* ARM/PE has three new attributes:
 358      interfacearm - ?
 359      dllexport - for exporting a function/variable that will live in a dll
 360      dllimport - for importing a function/variable from a dll
 361
 362      Microsoft allows multiple declspecs in one __declspec, separating
 363      them with spaces.  We do NOT support this.  Instead, use __declspec
 364      multiple times.
 365   */
 366   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 367   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 368   { "interfacearm", 0, 0, true,  false, false, false,
 369     arm_handle_fndecl_attribute, NULL },
 370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 371   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 372     NULL },
 373   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 374     NULL },
 375   { "notshared",    0, 0, false, true, false, false,
 376     arm_handle_notshared_attribute, NULL },
 377 #endif
 378   /* ARMv8-M Security Extensions support.  */
 379   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 380     arm_handle_cmse_nonsecure_entry, NULL },
 381   { "cmse_nonsecure_call", 0, 0, false, false, false, true,
 382     arm_handle_cmse_nonsecure_call, NULL },
 383   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
 384 };
 385
 386 static const scoped_attribute_specs arm_gnu_attribute_table =
 387 {
 388   "gnu", arm_gnu_attributes
 389 };
 390
 391 static const scoped_attribute_specs *const arm_attribute_table[] =
 392 {
 393   &arm_gnu_attribute_table
 394 };
 395 \f
 396 /* Initialize the GCC target structure.  */
 397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 398 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 400 #endif
 401
 402 #undef TARGET_CHECK_BUILTIN_CALL
 403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 404
 405 #undef TARGET_LEGITIMIZE_ADDRESS
 406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 407
 408 #undef  TARGET_ATTRIBUTE_TABLE
 409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 410
 411 #undef  TARGET_INSERT_ATTRIBUTES
 412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 413
 414 #undef TARGET_ASM_FILE_START
 415 #define TARGET_ASM_FILE_START arm_file_start
 416 #undef TARGET_ASM_FILE_END
 417 #define TARGET_ASM_FILE_END arm_file_end
 418
 419 #undef  TARGET_ASM_ALIGNED_SI_OP
 420 #define TARGET_ASM_ALIGNED_SI_OP NULL
 421 #undef  TARGET_ASM_INTEGER
 422 #define TARGET_ASM_INTEGER arm_assemble_integer
 423
 424 #undef TARGET_PRINT_OPERAND
 425 #define TARGET_PRINT_OPERAND arm_print_operand
 426 #undef TARGET_PRINT_OPERAND_ADDRESS
 427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 430
 431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 433
 434 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 436
 437 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 439
 440 #undef TARGET_CAN_INLINE_P
 441 #define TARGET_CAN_INLINE_P arm_can_inline_p
 442
 443 #undef TARGET_RELAYOUT_FUNCTION
 444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 445
 446 #undef  TARGET_OPTION_OVERRIDE
 447 #define TARGET_OPTION_OVERRIDE arm_option_override
 448
 449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 451
 452 #undef TARGET_OPTION_RESTORE
 453 #define TARGET_OPTION_RESTORE arm_option_restore
 454
 455 #undef TARGET_OPTION_PRINT
 456 #define TARGET_OPTION_PRINT arm_option_print
 457
 458 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 460
 461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 463
 464 #undef TARGET_SCHED_MACRO_FUSION_P
 465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 466
 467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 469
 470 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 472
 473 #undef  TARGET_SCHED_ADJUST_COST
 474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 475
 476 #undef TARGET_SET_CURRENT_FUNCTION
 477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 478
 479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 481
 482 #undef TARGET_SCHED_REORDER
 483 #define TARGET_SCHED_REORDER arm_sched_reorder
 484
 485 #undef TARGET_REGISTER_MOVE_COST
 486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 487
 488 #undef TARGET_MEMORY_MOVE_COST
 489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 490
 491 #undef TARGET_ENCODE_SECTION_INFO
 492 #ifdef ARM_PE
 493 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 494 #else
 495 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 496 #endif
 497
 498 #undef  TARGET_STRIP_NAME_ENCODING
 499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 500
 501 #undef  TARGET_ASM_INTERNAL_LABEL
 502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 503
 504 #undef TARGET_FLOATN_MODE
 505 #define TARGET_FLOATN_MODE arm_floatn_mode
 506
 507 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 509
 510 #undef  TARGET_FUNCTION_VALUE
 511 #define TARGET_FUNCTION_VALUE arm_function_value
 512
 513 #undef  TARGET_LIBCALL_VALUE
 514 #define TARGET_LIBCALL_VALUE arm_libcall_value
 515
 516 #undef TARGET_FUNCTION_VALUE_REGNO_P
 517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 518
 519 #undef TARGET_GIMPLE_FOLD_BUILTIN
 520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
 521
 522 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 524 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 526
 527 #undef  TARGET_RTX_COSTS
 528 #define TARGET_RTX_COSTS arm_rtx_costs
 529 #undef  TARGET_ADDRESS_COST
 530 #define TARGET_ADDRESS_COST arm_address_cost
 531 #undef TARGET_INSN_COST
 532 #define TARGET_INSN_COST arm_insn_cost
 533
 534 #undef TARGET_SHIFT_TRUNCATION_MASK
 535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 544   arm_autovectorize_vector_modes
 545
 546 #undef  TARGET_MACHINE_DEPENDENT_REORG
 547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 548
 549 #undef  TARGET_INIT_BUILTINS
 550 #define TARGET_INIT_BUILTINS  arm_init_builtins
 551 #undef  TARGET_EXPAND_BUILTIN
 552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 553 #undef  TARGET_BUILTIN_DECL
 554 #define TARGET_BUILTIN_DECL arm_builtin_decl
 555
 556 #undef TARGET_INIT_LIBFUNCS
 557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 558
 559 #undef TARGET_PROMOTE_FUNCTION_MODE
 560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 561 #undef TARGET_PROMOTE_PROTOTYPES
 562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 563 #undef TARGET_PASS_BY_REFERENCE
 564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 565 #undef TARGET_ARG_PARTIAL_BYTES
 566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 567 #undef TARGET_FUNCTION_ARG
 568 #define TARGET_FUNCTION_ARG arm_function_arg
 569 #undef TARGET_FUNCTION_ARG_ADVANCE
 570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 571 #undef TARGET_FUNCTION_ARG_PADDING
 572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 573 #undef TARGET_FUNCTION_ARG_BOUNDARY
 574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 575
 576 #undef  TARGET_SETUP_INCOMING_VARARGS
 577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 578
 579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 581
 582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 584 #undef TARGET_TRAMPOLINE_INIT
 585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 588
 589 #undef TARGET_WARN_FUNC_RETURN
 590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 591
 592 #undef TARGET_DEFAULT_SHORT_ENUMS
 593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 594
 595 #undef TARGET_ALIGN_ANON_BITFIELD
 596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 597
 598 #undef TARGET_NARROW_VOLATILE_BITFIELD
 599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 600
 601 #undef TARGET_CXX_GUARD_TYPE
 602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 603
 604 #undef TARGET_CXX_GUARD_MASK_BIT
 605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 606
 607 #undef TARGET_CXX_GET_COOKIE_SIZE
 608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 609
 610 #undef TARGET_CXX_COOKIE_HAS_SIZE
 611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 612
 613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 615
 616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 618
 619 #undef TARGET_CXX_USE_AEABI_ATEXIT
 620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 621
 622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 624   arm_cxx_determine_class_data_visibility
 625
 626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 628
 629 #undef TARGET_RETURN_IN_MSB
 630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 631
 632 #undef TARGET_RETURN_IN_MEMORY
 633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 634
 635 #undef TARGET_MUST_PASS_IN_STACK
 636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 637
 638 #if ARM_UNWIND_INFO
 639 #undef TARGET_ASM_UNWIND_EMIT
 640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 641
 642 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 643 #undef TARGET_ASM_TTYPE
 644 #define TARGET_ASM_TTYPE arm_output_ttype
 645
 646 #undef TARGET_ARM_EABI_UNWINDER
 647 #define TARGET_ARM_EABI_UNWINDER true
 648
 649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 651
 652 #endif /* ARM_UNWIND_INFO */
 653
 654 #undef TARGET_ASM_INIT_SECTIONS
 655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 656
 657 #undef TARGET_DWARF_REGISTER_SPAN
 658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 659
 660 #undef  TARGET_CANNOT_COPY_INSN_P
 661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 662
 663 #ifdef HAVE_AS_TLS
 664 #undef TARGET_HAVE_TLS
 665 #define TARGET_HAVE_TLS true
 666 #endif
 667
 668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 670
 671 #undef TARGET_LEGITIMATE_CONSTANT_P
 672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 673
 674 #undef TARGET_CANNOT_FORCE_CONST_MEM
 675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 676
 677 #undef TARGET_MAX_ANCHOR_OFFSET
 678 #define TARGET_MAX_ANCHOR_OFFSET 4095
 679
 680 /* The minimum is set such that the total size of the block
 681    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 682    divisible by eight, ensuring natural spacing of anchors.  */
 683 #undef TARGET_MIN_ANCHOR_OFFSET
 684 #define TARGET_MIN_ANCHOR_OFFSET -4088
 685
 686 #undef TARGET_SCHED_ISSUE_RATE
 687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 688
 689 #undef TARGET_SCHED_VARIABLE_ISSUE
 690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 691
 692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 694   arm_first_cycle_multipass_dfa_lookahead
 695
 696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 698   arm_first_cycle_multipass_dfa_lookahead_guard
 699
 700 #undef TARGET_MANGLE_TYPE
 701 #define TARGET_MANGLE_TYPE arm_mangle_type
 702
 703 #undef TARGET_INVALID_CONVERSION
 704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 705
 706 #undef TARGET_INVALID_UNARY_OP
 707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 708
 709 #undef TARGET_INVALID_BINARY_OP
 710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 711
 712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 714
 715 #undef TARGET_BUILD_BUILTIN_VA_LIST
 716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 717 #undef TARGET_EXPAND_BUILTIN_VA_START
 718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 721
 722 #ifdef HAVE_AS_TLS
 723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 725 #endif
 726
 727 #undef TARGET_LEGITIMATE_ADDRESS_P
 728 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 729
 730 #undef TARGET_PREFERRED_RELOAD_CLASS
 731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 732
 733 #undef TARGET_PROMOTED_TYPE
 734 #define TARGET_PROMOTED_TYPE arm_promoted_type
 735
 736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 738
 739 #undef TARGET_COMPUTE_FRAME_LAYOUT
 740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 741
 742 #undef TARGET_FRAME_POINTER_REQUIRED
 743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 744
 745 #undef TARGET_CAN_ELIMINATE
 746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 747
 748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 750
 751 #undef TARGET_CLASS_LIKELY_SPILLED_P
 752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 753
 754 #undef TARGET_VECTORIZE_BUILTINS
 755 #define TARGET_VECTORIZE_BUILTINS
 756
 757 #undef TARGET_VECTOR_ALIGNMENT
 758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 759
 760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 762   arm_vector_alignment_reachable
 763
 764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 766   arm_builtin_support_vector_misalignment
 767
 768 #undef TARGET_PREFERRED_RENAME_CLASS
 769 #define TARGET_PREFERRED_RENAME_CLASS \
 770   arm_preferred_rename_class
 771
 772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 774
 775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 777   arm_builtin_vectorization_cost
 778
 779 #undef TARGET_CANONICALIZE_COMPARISON
 780 #define TARGET_CANONICALIZE_COMPARISON \
 781   arm_canonicalize_comparison
 782
 783 #undef TARGET_ASAN_SHADOW_OFFSET
 784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 785
 786 #undef MAX_INSN_PER_IT_BLOCK
 787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 788
 789 #undef TARGET_CAN_USE_DOLOOP_P
 790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 791
 792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 794
 795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 797
 798 #undef TARGET_SCHED_FUSION_PRIORITY
 799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 800
 801 #undef  TARGET_ASM_FUNCTION_SECTION
 802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 803
 804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 806
 807 #undef TARGET_SECTION_TYPE_FLAGS
 808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 809
 810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 812
 813 #undef TARGET_C_EXCESS_PRECISION
 814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 815
 816 /* Although the architecture reserves bits 0 and 1, only the former is
 817    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 820
 821 #undef TARGET_FIXED_CONDITION_CODE_REGS
 822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 823
 824 #undef TARGET_HARD_REGNO_NREGS
 825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 826 #undef TARGET_HARD_REGNO_MODE_OK
 827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 828
 829 #undef TARGET_MODES_TIEABLE_P
 830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 831
 832 #undef TARGET_CAN_CHANGE_MODE_CLASS
 833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 834
 835 #undef TARGET_CONSTANT_ALIGNMENT
 836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 837
 838 #undef TARGET_INVALID_WITHIN_DOLOOP
 839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 840
 841 #undef TARGET_MD_ASM_ADJUST
 842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 843
 844 #undef TARGET_STACK_PROTECT_GUARD
 845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 846
 847 #undef TARGET_VECTORIZE_GET_MASK_MODE
 848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 849 \f
 850 /* Obstack for minipool constant handling.  */
 851 static struct obstack minipool_obstack;
 852 static char *         minipool_startobj;
 853
 854 /* The maximum number of insns skipped which
 855    will be conditionalised if possible.  */
 856 static int max_insns_skipped = 5;
 857
 858 /* True if we are currently building a constant table.  */
 859 int making_const_table;
 860
 861 /* The processor for which instructions should be scheduled.  */
 862 enum processor_type arm_tune = TARGET_CPU_arm_none;
 863
 864 /* The current tuning set.  */
 865 const struct tune_params *current_tune;
 866
 867 /* Which floating point hardware to schedule for.  */
 868 int arm_fpu_attr;
 869
 870 /* Used for Thumb call_via trampolines.  */
 871 rtx thumb_call_via_label[14];
 872 static int thumb_call_reg_needed;
 873
 874 /* The bits in this mask specify which instruction scheduling options should
 875    be used.  */
 876 unsigned int tune_flags = 0;
 877
 878 /* The highest ARM architecture version supported by the
 879    target.  */
 880 enum base_architecture arm_base_arch = BASE_ARCH_0;
 881
 882 /* Active target architecture and tuning.  */
 883
 884 struct arm_build_target arm_active_target;
 885
 886 /* The following are used in the arm.md file as equivalents to bits
 887    in the above two flag variables.  */
 888
 889 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 890 int arm_arch4 = 0;
 891
 892 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 893 int arm_arch4t = 0;
 894
 895 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 896 int arm_arch5t = 0;
 897
 898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 899 int arm_arch5te = 0;
 900
 901 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 902 int arm_arch6 = 0;
 903
 904 /* Nonzero if this chip supports the ARM 6K extensions.  */
 905 int arm_arch6k = 0;
 906
 907 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 908 int arm_arch6kz = 0;
 909
 910 /* Nonzero if instructions present in ARMv6-M can be used.  */
 911 int arm_arch6m = 0;
 912
 913 /* Nonzero if this chip supports the ARM 7 extensions.  */
 914 int arm_arch7 = 0;
 915
 916 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 917 int arm_arch_lpae = 0;
 918
 919 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 920 int arm_arch_notm = 0;
 921
 922 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 923 int arm_arch7em = 0;
 924
 925 /* Nonzero if instructions present in ARMv8 can be used.  */
 926 int arm_arch8 = 0;
 927
 928 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 929 int arm_arch8_1 = 0;
 930
 931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 932 int arm_arch8_2 = 0;
 933
 934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 935 int arm_arch8_3 = 0;
 936
 937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 938 int arm_arch8_4 = 0;
 939
 940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
 941    extensions.  */
 942 int arm_arch8m_main = 0;
 943
 944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 945    extensions.  */
 946 int arm_arch8_1m_main = 0;
 947
 948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 949    Architecture 8.2.  */
 950 int arm_fp16_inst = 0;
 951
 952 /* Nonzero if this chip can benefit from load scheduling.  */
 953 int arm_ld_sched = 0;
 954
 955 /* Nonzero if this chip is a StrongARM.  */
 956 int arm_tune_strongarm = 0;
 957
 958 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 959 int arm_arch_iwmmxt = 0;
 960
 961 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 962 int arm_arch_iwmmxt2 = 0;
 963
 964 /* Nonzero if this chip is an XScale.  */
 965 int arm_arch_xscale = 0;
 966
 967 /* Nonzero if tuning for XScale  */
 968 int arm_tune_xscale = 0;
 969
 970 /* Nonzero if we want to tune for stores that access the write-buffer.
 971    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 972 int arm_tune_wbuf = 0;
 973
 974 /* Nonzero if tuning for Cortex-A9.  */
 975 int arm_tune_cortex_a9 = 0;
 976
 977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 978    preprocessor.
 979    XXX This is a bit of a hack, it's intended to help work around
 980    problems in GLD which doesn't understand that armv5t code is
 981    interworking clean.  */
 982 int arm_cpp_interwork = 0;
 983
 984 /* Nonzero if chip supports Thumb 1.  */
 985 int arm_arch_thumb1;
 986
 987 /* Nonzero if chip supports Thumb 2.  */
 988 int arm_arch_thumb2;
 989
 990 /* Nonzero if chip supports integer division instruction.  */
 991 int arm_arch_arm_hwdiv;
 992 int arm_arch_thumb_hwdiv;
 993
 994 /* Nonzero if chip disallows volatile memory access in IT block.  */
 995 int arm_arch_no_volatile_ce;
 996
 997 /* Nonzero if we shouldn't use literal pools.  */
 998 bool arm_disable_literal_pool = false;
 999
1000 /* The register number to be used for the PIC offset register.  */
1001 unsigned arm_pic_register = INVALID_REGNUM;
1002
1003 enum arm_pcs arm_pcs_default;
1004
1005 /* For an explanation of these variables, see final_prescan_insn below.  */
1006 int arm_ccfsm_state;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
1008 enum arm_cond_code arm_current_cc;
1009
1010 rtx arm_target_insn;
1011 int arm_target_label;
1012 /* The number of conditionally executed insns, including the current insn.  */
1013 int arm_condexec_count = 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015    Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask = 0;
1017 /* The number of bits used in arm_condexec_mask.  */
1018 int arm_condexec_masklen = 0;
1019
1020 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1021 int arm_arch_crc = 0;
1022
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1024 int arm_arch_dotprod = 0;
1025
1026 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1027 int arm_arch_cmse = 0;
1028
1029 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1030 int arm_m_profile_small_mul = 0;
1031
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1033 int arm_arch_i8mm = 0;
1034
1035 /* Nonzero if chip supports the BFloat16 instructions.  */
1036 int arm_arch_bf16 = 0;
1037
1038 /* Nonzero if chip supports the Custom Datapath Extension.  */
1039 int arm_arch_cde = 0;
1040 int arm_arch_cde_coproc = 0;
1041 const int arm_arch_cde_coproc_bits[] = {
1042   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1043 };
1044
1045 /* The condition codes of the ARM, and the inverse function.  */
1046 static const char * const arm_condition_codes[] =
1047 {
1048   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1050 };
1051
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1053 int arm_regs_in_sequence[] =
1054 {
1055   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1056 };
1057
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1060   FP_SYSREGS
1061 };
1062 #undef DEF_FP_SYSREG
1063
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1066
1067 #define THUMB2_WORK_REGS                                        \
1068   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)              \
1069             | (1 << SP_REGNUM)                                  \
1070             | (1 << PC_REGNUM)                                  \
1071             | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM        \
1072                ? (1 << PIC_OFFSET_TABLE_REGNUM)                 \
1073                : 0)))
1074 \f
1075 /* Initialization code.  */
1076
1077 struct cpu_tune
1078 {
1079   enum processor_type scheduler;
1080   unsigned int tune_flags;
1081   const struct tune_params *tune;
1082 };
1083
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1086   {                                                             \
1087     num_slots,                                                  \
1088     l1_size,                                                    \
1089     l1_line_size                                                \
1090   }
1091
1092 /* arm generic vectorizer costs.  */
1093 static const
1094 struct cpu_vec_costs arm_default_vec_cost = {
1095   1,                                    /* scalar_stmt_cost.  */
1096   1,                                    /* scalar load_cost.  */
1097   1,                                    /* scalar_store_cost.  */
1098   1,                                    /* vec_stmt_cost.  */
1099   1,                                    /* vec_to_scalar_cost.  */
1100   1,                                    /* scalar_to_vec_cost.  */
1101   1,                                    /* vec_align_load_cost.  */
1102   1,                                    /* vec_unalign_load_cost.  */
1103   1,                                    /* vec_unalign_store_cost.  */
1104   1,                                    /* vec_store_cost.  */
1105   3,                                    /* cond_taken_branch_cost.  */
1106   1,                                    /* cond_not_taken_branch_cost.  */
1107 };
1108
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1110 #include "aarch-cost-tables.h"
1111
1112
1113
1114 const struct cpu_cost_table cortexa9_extra_costs =
1115 {
1116   /* ALU */
1117   {
1118     0,                  /* arith.  */
1119     0,                  /* logical.  */
1120     0,                  /* shift.  */
1121     COSTS_N_INSNS (1),  /* shift_reg.  */
1122     COSTS_N_INSNS (1),  /* arith_shift.  */
1123     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1124     0,                  /* log_shift.  */
1125     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1126     COSTS_N_INSNS (1),  /* extend.  */
1127     COSTS_N_INSNS (2),  /* extend_arith.  */
1128     COSTS_N_INSNS (1),  /* bfi.  */
1129     COSTS_N_INSNS (1),  /* bfx.  */
1130     0,                  /* clz.  */
1131     0,                  /* rev.  */
1132     0,                  /* non_exec.  */
1133     true                /* non_exec_costs_exec.  */
1134   },
1135   {
1136     /* MULT SImode */
1137     {
1138       COSTS_N_INSNS (3),        /* simple.  */
1139       COSTS_N_INSNS (3),        /* flag_setting.  */
1140       COSTS_N_INSNS (2),        /* extend.  */
1141       COSTS_N_INSNS (3),        /* add.  */
1142       COSTS_N_INSNS (2),        /* extend_add.  */
1143       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1144     },
1145     /* MULT DImode */
1146     {
1147       0,                        /* simple (N/A).  */
1148       0,                        /* flag_setting (N/A).  */
1149       COSTS_N_INSNS (4),        /* extend.  */
1150       0,                        /* add (N/A).  */
1151       COSTS_N_INSNS (4),        /* extend_add.  */
1152       0                         /* idiv (N/A).  */
1153     }
1154   },
1155   /* LD/ST */
1156   {
1157     COSTS_N_INSNS (2),  /* load.  */
1158     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1159     COSTS_N_INSNS (2),  /* ldrd.  */
1160     COSTS_N_INSNS (2),  /* ldm_1st.  */
1161     1,                  /* ldm_regs_per_insn_1st.  */
1162     2,                  /* ldm_regs_per_insn_subsequent.  */
1163     COSTS_N_INSNS (5),  /* loadf.  */
1164     COSTS_N_INSNS (5),  /* loadd.  */
1165     COSTS_N_INSNS (1),  /* load_unaligned.  */
1166     COSTS_N_INSNS (2),  /* store.  */
1167     COSTS_N_INSNS (2),  /* strd.  */
1168     COSTS_N_INSNS (2),  /* stm_1st.  */
1169     1,                  /* stm_regs_per_insn_1st.  */
1170     2,                  /* stm_regs_per_insn_subsequent.  */
1171     COSTS_N_INSNS (1),  /* storef.  */
1172     COSTS_N_INSNS (1),  /* stored.  */
1173     COSTS_N_INSNS (1),  /* store_unaligned.  */
1174     COSTS_N_INSNS (1),  /* loadv.  */
1175     COSTS_N_INSNS (1)   /* storev.  */
1176   },
1177   {
1178     /* FP SFmode */
1179     {
1180       COSTS_N_INSNS (14),       /* div.  */
1181       COSTS_N_INSNS (4),        /* mult.  */
1182       COSTS_N_INSNS (7),        /* mult_addsub. */
1183       COSTS_N_INSNS (30),       /* fma.  */
1184       COSTS_N_INSNS (3),        /* addsub.  */
1185       COSTS_N_INSNS (1),        /* fpconst.  */
1186       COSTS_N_INSNS (1),        /* neg.  */
1187       COSTS_N_INSNS (3),        /* compare.  */
1188       COSTS_N_INSNS (3),        /* widen.  */
1189       COSTS_N_INSNS (3),        /* narrow.  */
1190       COSTS_N_INSNS (3),        /* toint.  */
1191       COSTS_N_INSNS (3),        /* fromint.  */
1192       COSTS_N_INSNS (3)         /* roundint.  */
1193     },
1194     /* FP DFmode */
1195     {
1196       COSTS_N_INSNS (24),       /* div.  */
1197       COSTS_N_INSNS (5),        /* mult.  */
1198       COSTS_N_INSNS (8),        /* mult_addsub.  */
1199       COSTS_N_INSNS (30),       /* fma.  */
1200       COSTS_N_INSNS (3),        /* addsub.  */
1201       COSTS_N_INSNS (1),        /* fpconst.  */
1202       COSTS_N_INSNS (1),        /* neg.  */
1203       COSTS_N_INSNS (3),        /* compare.  */
1204       COSTS_N_INSNS (3),        /* widen.  */
1205       COSTS_N_INSNS (3),        /* narrow.  */
1206       COSTS_N_INSNS (3),        /* toint.  */
1207       COSTS_N_INSNS (3),        /* fromint.  */
1208       COSTS_N_INSNS (3)         /* roundint.  */
1209     }
1210   },
1211   /* Vector */
1212   {
1213     COSTS_N_INSNS (1),  /* alu.  */
1214     COSTS_N_INSNS (4),  /* mult.  */
1215     COSTS_N_INSNS (1),  /* movi.  */
1216     COSTS_N_INSNS (2),  /* dup.  */
1217     COSTS_N_INSNS (2)   /* extract.  */
1218   }
1219 };
1220
1221 const struct cpu_cost_table cortexa8_extra_costs =
1222 {
1223   /* ALU */
1224   {
1225     0,                  /* arith.  */
1226     0,                  /* logical.  */
1227     COSTS_N_INSNS (1),  /* shift.  */
1228     0,                  /* shift_reg.  */
1229     COSTS_N_INSNS (1),  /* arith_shift.  */
1230     0,                  /* arith_shift_reg.  */
1231     COSTS_N_INSNS (1),  /* log_shift.  */
1232     0,                  /* log_shift_reg.  */
1233     0,                  /* extend.  */
1234     0,                  /* extend_arith.  */
1235     0,                  /* bfi.  */
1236     0,                  /* bfx.  */
1237     0,                  /* clz.  */
1238     0,                  /* rev.  */
1239     0,                  /* non_exec.  */
1240     true                /* non_exec_costs_exec.  */
1241   },
1242   {
1243     /* MULT SImode */
1244     {
1245       COSTS_N_INSNS (1),        /* simple.  */
1246       COSTS_N_INSNS (1),        /* flag_setting.  */
1247       COSTS_N_INSNS (1),        /* extend.  */
1248       COSTS_N_INSNS (1),        /* add.  */
1249       COSTS_N_INSNS (1),        /* extend_add.  */
1250       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1251     },
1252     /* MULT DImode */
1253     {
1254       0,                        /* simple (N/A).  */
1255       0,                        /* flag_setting (N/A).  */
1256       COSTS_N_INSNS (2),        /* extend.  */
1257       0,                        /* add (N/A).  */
1258       COSTS_N_INSNS (2),        /* extend_add.  */
1259       0                         /* idiv (N/A).  */
1260     }
1261   },
1262   /* LD/ST */
1263   {
1264     COSTS_N_INSNS (1),  /* load.  */
1265     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1266     COSTS_N_INSNS (1),  /* ldrd.  */
1267     COSTS_N_INSNS (1),  /* ldm_1st.  */
1268     1,                  /* ldm_regs_per_insn_1st.  */
1269     2,                  /* ldm_regs_per_insn_subsequent.  */
1270     COSTS_N_INSNS (1),  /* loadf.  */
1271     COSTS_N_INSNS (1),  /* loadd.  */
1272     COSTS_N_INSNS (1),  /* load_unaligned.  */
1273     COSTS_N_INSNS (1),  /* store.  */
1274     COSTS_N_INSNS (1),  /* strd.  */
1275     COSTS_N_INSNS (1),  /* stm_1st.  */
1276     1,                  /* stm_regs_per_insn_1st.  */
1277     2,                  /* stm_regs_per_insn_subsequent.  */
1278     COSTS_N_INSNS (1),  /* storef.  */
1279     COSTS_N_INSNS (1),  /* stored.  */
1280     COSTS_N_INSNS (1),  /* store_unaligned.  */
1281     COSTS_N_INSNS (1),  /* loadv.  */
1282     COSTS_N_INSNS (1)   /* storev.  */
1283   },
1284   {
1285     /* FP SFmode */
1286     {
1287       COSTS_N_INSNS (36),       /* div.  */
1288       COSTS_N_INSNS (11),       /* mult.  */
1289       COSTS_N_INSNS (20),       /* mult_addsub. */
1290       COSTS_N_INSNS (30),       /* fma.  */
1291       COSTS_N_INSNS (9),        /* addsub.  */
1292       COSTS_N_INSNS (3),        /* fpconst.  */
1293       COSTS_N_INSNS (3),        /* neg.  */
1294       COSTS_N_INSNS (6),        /* compare.  */
1295       COSTS_N_INSNS (4),        /* widen.  */
1296       COSTS_N_INSNS (4),        /* narrow.  */
1297       COSTS_N_INSNS (8),        /* toint.  */
1298       COSTS_N_INSNS (8),        /* fromint.  */
1299       COSTS_N_INSNS (8)         /* roundint.  */
1300     },
1301     /* FP DFmode */
1302     {
1303       COSTS_N_INSNS (64),       /* div.  */
1304       COSTS_N_INSNS (16),       /* mult.  */
1305       COSTS_N_INSNS (25),       /* mult_addsub.  */
1306       COSTS_N_INSNS (30),       /* fma.  */
1307       COSTS_N_INSNS (9),        /* addsub.  */
1308       COSTS_N_INSNS (3),        /* fpconst.  */
1309       COSTS_N_INSNS (3),        /* neg.  */
1310       COSTS_N_INSNS (6),        /* compare.  */
1311       COSTS_N_INSNS (6),        /* widen.  */
1312       COSTS_N_INSNS (6),        /* narrow.  */
1313       COSTS_N_INSNS (8),        /* toint.  */
1314       COSTS_N_INSNS (8),        /* fromint.  */
1315       COSTS_N_INSNS (8)         /* roundint.  */
1316     }
1317   },
1318   /* Vector */
1319   {
1320     COSTS_N_INSNS (1),  /* alu.  */
1321     COSTS_N_INSNS (4),  /* mult.  */
1322     COSTS_N_INSNS (1),  /* movi.  */
1323     COSTS_N_INSNS (2),  /* dup.  */
1324     COSTS_N_INSNS (2)   /* extract.  */
1325   }
1326 };
1327
1328 const struct cpu_cost_table cortexa5_extra_costs =
1329 {
1330   /* ALU */
1331   {
1332     0,                  /* arith.  */
1333     0,                  /* logical.  */
1334     COSTS_N_INSNS (1),  /* shift.  */
1335     COSTS_N_INSNS (1),  /* shift_reg.  */
1336     COSTS_N_INSNS (1),  /* arith_shift.  */
1337     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1338     COSTS_N_INSNS (1),  /* log_shift.  */
1339     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1340     COSTS_N_INSNS (1),  /* extend.  */
1341     COSTS_N_INSNS (1),  /* extend_arith.  */
1342     COSTS_N_INSNS (1),  /* bfi.  */
1343     COSTS_N_INSNS (1),  /* bfx.  */
1344     COSTS_N_INSNS (1),  /* clz.  */
1345     COSTS_N_INSNS (1),  /* rev.  */
1346     0,                  /* non_exec.  */
1347     true                /* non_exec_costs_exec.  */
1348   },
1349
1350   {
1351     /* MULT SImode */
1352     {
1353       0,                        /* simple.  */
1354       COSTS_N_INSNS (1),        /* flag_setting.  */
1355       COSTS_N_INSNS (1),        /* extend.  */
1356       COSTS_N_INSNS (1),        /* add.  */
1357       COSTS_N_INSNS (1),        /* extend_add.  */
1358       COSTS_N_INSNS (7)         /* idiv.  */
1359     },
1360     /* MULT DImode */
1361     {
1362       0,                        /* simple (N/A).  */
1363       0,                        /* flag_setting (N/A).  */
1364       COSTS_N_INSNS (1),        /* extend.  */
1365       0,                        /* add.  */
1366       COSTS_N_INSNS (2),        /* extend_add.  */
1367       0                         /* idiv (N/A).  */
1368     }
1369   },
1370   /* LD/ST */
1371   {
1372     COSTS_N_INSNS (1),  /* load.  */
1373     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1374     COSTS_N_INSNS (6),  /* ldrd.  */
1375     COSTS_N_INSNS (1),  /* ldm_1st.  */
1376     1,                  /* ldm_regs_per_insn_1st.  */
1377     2,                  /* ldm_regs_per_insn_subsequent.  */
1378     COSTS_N_INSNS (2),  /* loadf.  */
1379     COSTS_N_INSNS (4),  /* loadd.  */
1380     COSTS_N_INSNS (1),  /* load_unaligned.  */
1381     COSTS_N_INSNS (1),  /* store.  */
1382     COSTS_N_INSNS (3),  /* strd.  */
1383     COSTS_N_INSNS (1),  /* stm_1st.  */
1384     1,                  /* stm_regs_per_insn_1st.  */
1385     2,                  /* stm_regs_per_insn_subsequent.  */
1386     COSTS_N_INSNS (2),  /* storef.  */
1387     COSTS_N_INSNS (2),  /* stored.  */
1388     COSTS_N_INSNS (1),  /* store_unaligned.  */
1389     COSTS_N_INSNS (1),  /* loadv.  */
1390     COSTS_N_INSNS (1)   /* storev.  */
1391   },
1392   {
1393     /* FP SFmode */
1394     {
1395       COSTS_N_INSNS (15),       /* div.  */
1396       COSTS_N_INSNS (3),        /* mult.  */
1397       COSTS_N_INSNS (7),        /* mult_addsub. */
1398       COSTS_N_INSNS (7),        /* fma.  */
1399       COSTS_N_INSNS (3),        /* addsub.  */
1400       COSTS_N_INSNS (3),        /* fpconst.  */
1401       COSTS_N_INSNS (3),        /* neg.  */
1402       COSTS_N_INSNS (3),        /* compare.  */
1403       COSTS_N_INSNS (3),        /* widen.  */
1404       COSTS_N_INSNS (3),        /* narrow.  */
1405       COSTS_N_INSNS (3),        /* toint.  */
1406       COSTS_N_INSNS (3),        /* fromint.  */
1407       COSTS_N_INSNS (3)         /* roundint.  */
1408     },
1409     /* FP DFmode */
1410     {
1411       COSTS_N_INSNS (30),       /* div.  */
1412       COSTS_N_INSNS (6),        /* mult.  */
1413       COSTS_N_INSNS (10),       /* mult_addsub.  */
1414       COSTS_N_INSNS (7),        /* fma.  */
1415       COSTS_N_INSNS (3),        /* addsub.  */
1416       COSTS_N_INSNS (3),        /* fpconst.  */
1417       COSTS_N_INSNS (3),        /* neg.  */
1418       COSTS_N_INSNS (3),        /* compare.  */
1419       COSTS_N_INSNS (3),        /* widen.  */
1420       COSTS_N_INSNS (3),        /* narrow.  */
1421       COSTS_N_INSNS (3),        /* toint.  */
1422       COSTS_N_INSNS (3),        /* fromint.  */
1423       COSTS_N_INSNS (3)         /* roundint.  */
1424     }
1425   },
1426   /* Vector */
1427   {
1428     COSTS_N_INSNS (1),  /* alu.  */
1429     COSTS_N_INSNS (4),  /* mult.  */
1430     COSTS_N_INSNS (1),  /* movi.  */
1431     COSTS_N_INSNS (2),  /* dup.  */
1432     COSTS_N_INSNS (2)   /* extract.  */
1433   }
1434 };
1435
1436
1437 const struct cpu_cost_table cortexa7_extra_costs =
1438 {
1439   /* ALU */
1440   {
1441     0,                  /* arith.  */
1442     0,                  /* logical.  */
1443     COSTS_N_INSNS (1),  /* shift.  */
1444     COSTS_N_INSNS (1),  /* shift_reg.  */
1445     COSTS_N_INSNS (1),  /* arith_shift.  */
1446     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1447     COSTS_N_INSNS (1),  /* log_shift.  */
1448     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1449     COSTS_N_INSNS (1),  /* extend.  */
1450     COSTS_N_INSNS (1),  /* extend_arith.  */
1451     COSTS_N_INSNS (1),  /* bfi.  */
1452     COSTS_N_INSNS (1),  /* bfx.  */
1453     COSTS_N_INSNS (1),  /* clz.  */
1454     COSTS_N_INSNS (1),  /* rev.  */
1455     0,                  /* non_exec.  */
1456     true                /* non_exec_costs_exec.  */
1457   },
1458
1459   {
1460     /* MULT SImode */
1461     {
1462       0,                        /* simple.  */
1463       COSTS_N_INSNS (1),        /* flag_setting.  */
1464       COSTS_N_INSNS (1),        /* extend.  */
1465       COSTS_N_INSNS (1),        /* add.  */
1466       COSTS_N_INSNS (1),        /* extend_add.  */
1467       COSTS_N_INSNS (7)         /* idiv.  */
1468     },
1469     /* MULT DImode */
1470     {
1471       0,                        /* simple (N/A).  */
1472       0,                        /* flag_setting (N/A).  */
1473       COSTS_N_INSNS (1),        /* extend.  */
1474       0,                        /* add.  */
1475       COSTS_N_INSNS (2),        /* extend_add.  */
1476       0                         /* idiv (N/A).  */
1477     }
1478   },
1479   /* LD/ST */
1480   {
1481     COSTS_N_INSNS (1),  /* load.  */
1482     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1483     COSTS_N_INSNS (3),  /* ldrd.  */
1484     COSTS_N_INSNS (1),  /* ldm_1st.  */
1485     1,                  /* ldm_regs_per_insn_1st.  */
1486     2,                  /* ldm_regs_per_insn_subsequent.  */
1487     COSTS_N_INSNS (2),  /* loadf.  */
1488     COSTS_N_INSNS (2),  /* loadd.  */
1489     COSTS_N_INSNS (1),  /* load_unaligned.  */
1490     COSTS_N_INSNS (1),  /* store.  */
1491     COSTS_N_INSNS (3),  /* strd.  */
1492     COSTS_N_INSNS (1),  /* stm_1st.  */
1493     1,                  /* stm_regs_per_insn_1st.  */
1494     2,                  /* stm_regs_per_insn_subsequent.  */
1495     COSTS_N_INSNS (2),  /* storef.  */
1496     COSTS_N_INSNS (2),  /* stored.  */
1497     COSTS_N_INSNS (1),  /* store_unaligned.  */
1498     COSTS_N_INSNS (1),  /* loadv.  */
1499     COSTS_N_INSNS (1)   /* storev.  */
1500   },
1501   {
1502     /* FP SFmode */
1503     {
1504       COSTS_N_INSNS (15),       /* div.  */
1505       COSTS_N_INSNS (3),        /* mult.  */
1506       COSTS_N_INSNS (7),        /* mult_addsub. */
1507       COSTS_N_INSNS (7),        /* fma.  */
1508       COSTS_N_INSNS (3),        /* addsub.  */
1509       COSTS_N_INSNS (3),        /* fpconst.  */
1510       COSTS_N_INSNS (3),        /* neg.  */
1511       COSTS_N_INSNS (3),        /* compare.  */
1512       COSTS_N_INSNS (3),        /* widen.  */
1513       COSTS_N_INSNS (3),        /* narrow.  */
1514       COSTS_N_INSNS (3),        /* toint.  */
1515       COSTS_N_INSNS (3),        /* fromint.  */
1516       COSTS_N_INSNS (3)         /* roundint.  */
1517     },
1518     /* FP DFmode */
1519     {
1520       COSTS_N_INSNS (30),       /* div.  */
1521       COSTS_N_INSNS (6),        /* mult.  */
1522       COSTS_N_INSNS (10),       /* mult_addsub.  */
1523       COSTS_N_INSNS (7),        /* fma.  */
1524       COSTS_N_INSNS (3),        /* addsub.  */
1525       COSTS_N_INSNS (3),        /* fpconst.  */
1526       COSTS_N_INSNS (3),        /* neg.  */
1527       COSTS_N_INSNS (3),        /* compare.  */
1528       COSTS_N_INSNS (3),        /* widen.  */
1529       COSTS_N_INSNS (3),        /* narrow.  */
1530       COSTS_N_INSNS (3),        /* toint.  */
1531       COSTS_N_INSNS (3),        /* fromint.  */
1532       COSTS_N_INSNS (3)         /* roundint.  */
1533     }
1534   },
1535   /* Vector */
1536   {
1537     COSTS_N_INSNS (1),  /* alu.  */
1538     COSTS_N_INSNS (4),  /* mult.  */
1539     COSTS_N_INSNS (1),  /* movi.  */
1540     COSTS_N_INSNS (2),  /* dup.  */
1541     COSTS_N_INSNS (2)   /* extract.  */
1542   }
1543 };
1544
1545 const struct cpu_cost_table cortexa12_extra_costs =
1546 {
1547   /* ALU */
1548   {
1549     0,                  /* arith.  */
1550     0,                  /* logical.  */
1551     0,                  /* shift.  */
1552     COSTS_N_INSNS (1),  /* shift_reg.  */
1553     COSTS_N_INSNS (1),  /* arith_shift.  */
1554     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1555     COSTS_N_INSNS (1),  /* log_shift.  */
1556     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1557     0,                  /* extend.  */
1558     COSTS_N_INSNS (1),  /* extend_arith.  */
1559     0,                  /* bfi.  */
1560     COSTS_N_INSNS (1),  /* bfx.  */
1561     COSTS_N_INSNS (1),  /* clz.  */
1562     COSTS_N_INSNS (1),  /* rev.  */
1563     0,                  /* non_exec.  */
1564     true                /* non_exec_costs_exec.  */
1565   },
1566   /* MULT SImode */
1567   {
1568     {
1569       COSTS_N_INSNS (2),        /* simple.  */
1570       COSTS_N_INSNS (3),        /* flag_setting.  */
1571       COSTS_N_INSNS (2),        /* extend.  */
1572       COSTS_N_INSNS (3),        /* add.  */
1573       COSTS_N_INSNS (2),        /* extend_add.  */
1574       COSTS_N_INSNS (18)        /* idiv.  */
1575     },
1576     /* MULT DImode */
1577     {
1578       0,                        /* simple (N/A).  */
1579       0,                        /* flag_setting (N/A).  */
1580       COSTS_N_INSNS (3),        /* extend.  */
1581       0,                        /* add (N/A).  */
1582       COSTS_N_INSNS (3),        /* extend_add.  */
1583       0                         /* idiv (N/A).  */
1584     }
1585   },
1586   /* LD/ST */
1587   {
1588     COSTS_N_INSNS (3),  /* load.  */
1589     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1590     COSTS_N_INSNS (3),  /* ldrd.  */
1591     COSTS_N_INSNS (3),  /* ldm_1st.  */
1592     1,                  /* ldm_regs_per_insn_1st.  */
1593     2,                  /* ldm_regs_per_insn_subsequent.  */
1594     COSTS_N_INSNS (3),  /* loadf.  */
1595     COSTS_N_INSNS (3),  /* loadd.  */
1596     0,                  /* load_unaligned.  */
1597     0,                  /* store.  */
1598     0,                  /* strd.  */
1599     0,                  /* stm_1st.  */
1600     1,                  /* stm_regs_per_insn_1st.  */
1601     2,                  /* stm_regs_per_insn_subsequent.  */
1602     COSTS_N_INSNS (2),  /* storef.  */
1603     COSTS_N_INSNS (2),  /* stored.  */
1604     0,                  /* store_unaligned.  */
1605     COSTS_N_INSNS (1),  /* loadv.  */
1606     COSTS_N_INSNS (1)   /* storev.  */
1607   },
1608   {
1609     /* FP SFmode */
1610     {
1611       COSTS_N_INSNS (17),       /* div.  */
1612       COSTS_N_INSNS (4),        /* mult.  */
1613       COSTS_N_INSNS (8),        /* mult_addsub. */
1614       COSTS_N_INSNS (8),        /* fma.  */
1615       COSTS_N_INSNS (4),        /* addsub.  */
1616       COSTS_N_INSNS (2),        /* fpconst. */
1617       COSTS_N_INSNS (2),        /* neg.  */
1618       COSTS_N_INSNS (2),        /* compare.  */
1619       COSTS_N_INSNS (4),        /* widen.  */
1620       COSTS_N_INSNS (4),        /* narrow.  */
1621       COSTS_N_INSNS (4),        /* toint.  */
1622       COSTS_N_INSNS (4),        /* fromint.  */
1623       COSTS_N_INSNS (4)         /* roundint.  */
1624     },
1625     /* FP DFmode */
1626     {
1627       COSTS_N_INSNS (31),       /* div.  */
1628       COSTS_N_INSNS (4),        /* mult.  */
1629       COSTS_N_INSNS (8),        /* mult_addsub.  */
1630       COSTS_N_INSNS (8),        /* fma.  */
1631       COSTS_N_INSNS (4),        /* addsub.  */
1632       COSTS_N_INSNS (2),        /* fpconst.  */
1633       COSTS_N_INSNS (2),        /* neg.  */
1634       COSTS_N_INSNS (2),        /* compare.  */
1635       COSTS_N_INSNS (4),        /* widen.  */
1636       COSTS_N_INSNS (4),        /* narrow.  */
1637       COSTS_N_INSNS (4),        /* toint.  */
1638       COSTS_N_INSNS (4),        /* fromint.  */
1639       COSTS_N_INSNS (4)         /* roundint.  */
1640     }
1641   },
1642   /* Vector */
1643   {
1644     COSTS_N_INSNS (1),  /* alu.  */
1645     COSTS_N_INSNS (4),  /* mult.  */
1646     COSTS_N_INSNS (1),  /* movi.  */
1647     COSTS_N_INSNS (2),  /* dup.  */
1648     COSTS_N_INSNS (2)   /* extract.  */
1649   }
1650 };
1651
1652 const struct cpu_cost_table cortexa15_extra_costs =
1653 {
1654   /* ALU */
1655   {
1656     0,                  /* arith.  */
1657     0,                  /* logical.  */
1658     0,                  /* shift.  */
1659     0,                  /* shift_reg.  */
1660     COSTS_N_INSNS (1),  /* arith_shift.  */
1661     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1662     COSTS_N_INSNS (1),  /* log_shift.  */
1663     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1664     0,                  /* extend.  */
1665     COSTS_N_INSNS (1),  /* extend_arith.  */
1666     COSTS_N_INSNS (1),  /* bfi.  */
1667     0,                  /* bfx.  */
1668     0,                  /* clz.  */
1669     0,                  /* rev.  */
1670     0,                  /* non_exec.  */
1671     true                /* non_exec_costs_exec.  */
1672   },
1673   /* MULT SImode */
1674   {
1675     {
1676       COSTS_N_INSNS (2),        /* simple.  */
1677       COSTS_N_INSNS (3),        /* flag_setting.  */
1678       COSTS_N_INSNS (2),        /* extend.  */
1679       COSTS_N_INSNS (2),        /* add.  */
1680       COSTS_N_INSNS (2),        /* extend_add.  */
1681       COSTS_N_INSNS (18)        /* idiv.  */
1682     },
1683     /* MULT DImode */
1684     {
1685       0,                        /* simple (N/A).  */
1686       0,                        /* flag_setting (N/A).  */
1687       COSTS_N_INSNS (3),        /* extend.  */
1688       0,                        /* add (N/A).  */
1689       COSTS_N_INSNS (3),        /* extend_add.  */
1690       0                         /* idiv (N/A).  */
1691     }
1692   },
1693   /* LD/ST */
1694   {
1695     COSTS_N_INSNS (3),  /* load.  */
1696     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1697     COSTS_N_INSNS (3),  /* ldrd.  */
1698     COSTS_N_INSNS (4),  /* ldm_1st.  */
1699     1,                  /* ldm_regs_per_insn_1st.  */
1700     2,                  /* ldm_regs_per_insn_subsequent.  */
1701     COSTS_N_INSNS (4),  /* loadf.  */
1702     COSTS_N_INSNS (4),  /* loadd.  */
1703     0,                  /* load_unaligned.  */
1704     0,                  /* store.  */
1705     0,                  /* strd.  */
1706     COSTS_N_INSNS (1),  /* stm_1st.  */
1707     1,                  /* stm_regs_per_insn_1st.  */
1708     2,                  /* stm_regs_per_insn_subsequent.  */
1709     0,                  /* storef.  */
1710     0,                  /* stored.  */
1711     0,                  /* store_unaligned.  */
1712     COSTS_N_INSNS (1),  /* loadv.  */
1713     COSTS_N_INSNS (1)   /* storev.  */
1714   },
1715   {
1716     /* FP SFmode */
1717     {
1718       COSTS_N_INSNS (17),       /* div.  */
1719       COSTS_N_INSNS (4),        /* mult.  */
1720       COSTS_N_INSNS (8),        /* mult_addsub. */
1721       COSTS_N_INSNS (8),        /* fma.  */
1722       COSTS_N_INSNS (4),        /* addsub.  */
1723       COSTS_N_INSNS (2),        /* fpconst. */
1724       COSTS_N_INSNS (2),        /* neg.  */
1725       COSTS_N_INSNS (5),        /* compare.  */
1726       COSTS_N_INSNS (4),        /* widen.  */
1727       COSTS_N_INSNS (4),        /* narrow.  */
1728       COSTS_N_INSNS (4),        /* toint.  */
1729       COSTS_N_INSNS (4),        /* fromint.  */
1730       COSTS_N_INSNS (4)         /* roundint.  */
1731     },
1732     /* FP DFmode */
1733     {
1734       COSTS_N_INSNS (31),       /* div.  */
1735       COSTS_N_INSNS (4),        /* mult.  */
1736       COSTS_N_INSNS (8),        /* mult_addsub.  */
1737       COSTS_N_INSNS (8),        /* fma.  */
1738       COSTS_N_INSNS (4),        /* addsub.  */
1739       COSTS_N_INSNS (2),        /* fpconst.  */
1740       COSTS_N_INSNS (2),        /* neg.  */
1741       COSTS_N_INSNS (2),        /* compare.  */
1742       COSTS_N_INSNS (4),        /* widen.  */
1743       COSTS_N_INSNS (4),        /* narrow.  */
1744       COSTS_N_INSNS (4),        /* toint.  */
1745       COSTS_N_INSNS (4),        /* fromint.  */
1746       COSTS_N_INSNS (4)         /* roundint.  */
1747     }
1748   },
1749   /* Vector */
1750   {
1751     COSTS_N_INSNS (1),  /* alu.  */
1752     COSTS_N_INSNS (4),  /* mult.  */
1753     COSTS_N_INSNS (1),  /* movi.  */
1754     COSTS_N_INSNS (2),  /* dup.  */
1755     COSTS_N_INSNS (2)   /* extract.  */
1756   }
1757 };
1758
1759 const struct cpu_cost_table v7m_extra_costs =
1760 {
1761   /* ALU */
1762   {
1763     0,                  /* arith.  */
1764     0,                  /* logical.  */
1765     0,                  /* shift.  */
1766     0,                  /* shift_reg.  */
1767     0,                  /* arith_shift.  */
1768     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1769     0,                  /* log_shift.  */
1770     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1771     0,                  /* extend.  */
1772     COSTS_N_INSNS (1),  /* extend_arith.  */
1773     0,                  /* bfi.  */
1774     0,                  /* bfx.  */
1775     0,                  /* clz.  */
1776     0,                  /* rev.  */
1777     COSTS_N_INSNS (1),  /* non_exec.  */
1778     false               /* non_exec_costs_exec.  */
1779   },
1780   {
1781     /* MULT SImode */
1782     {
1783       COSTS_N_INSNS (1),        /* simple.  */
1784       COSTS_N_INSNS (1),        /* flag_setting.  */
1785       COSTS_N_INSNS (2),        /* extend.  */
1786       COSTS_N_INSNS (1),        /* add.  */
1787       COSTS_N_INSNS (3),        /* extend_add.  */
1788       COSTS_N_INSNS (8)         /* idiv.  */
1789     },
1790     /* MULT DImode */
1791     {
1792       0,                        /* simple (N/A).  */
1793       0,                        /* flag_setting (N/A).  */
1794       COSTS_N_INSNS (2),        /* extend.  */
1795       0,                        /* add (N/A).  */
1796       COSTS_N_INSNS (3),        /* extend_add.  */
1797       0                         /* idiv (N/A).  */
1798     }
1799   },
1800   /* LD/ST */
1801   {
1802     COSTS_N_INSNS (2),  /* load.  */
1803     0,                  /* load_sign_extend.  */
1804     COSTS_N_INSNS (3),  /* ldrd.  */
1805     COSTS_N_INSNS (2),  /* ldm_1st.  */
1806     1,                  /* ldm_regs_per_insn_1st.  */
1807     1,                  /* ldm_regs_per_insn_subsequent.  */
1808     COSTS_N_INSNS (2),  /* loadf.  */
1809     COSTS_N_INSNS (3),  /* loadd.  */
1810     COSTS_N_INSNS (1),  /* load_unaligned.  */
1811     COSTS_N_INSNS (2),  /* store.  */
1812     COSTS_N_INSNS (3),  /* strd.  */
1813     COSTS_N_INSNS (2),  /* stm_1st.  */
1814     1,                  /* stm_regs_per_insn_1st.  */
1815     1,                  /* stm_regs_per_insn_subsequent.  */
1816     COSTS_N_INSNS (2),  /* storef.  */
1817     COSTS_N_INSNS (3),  /* stored.  */
1818     COSTS_N_INSNS (1),  /* store_unaligned.  */
1819     COSTS_N_INSNS (1),  /* loadv.  */
1820     COSTS_N_INSNS (1)   /* storev.  */
1821   },
1822   {
1823     /* FP SFmode */
1824     {
1825       COSTS_N_INSNS (7),        /* div.  */
1826       COSTS_N_INSNS (2),        /* mult.  */
1827       COSTS_N_INSNS (5),        /* mult_addsub.  */
1828       COSTS_N_INSNS (3),        /* fma.  */
1829       COSTS_N_INSNS (1),        /* addsub.  */
1830       0,                        /* fpconst.  */
1831       0,                        /* neg.  */
1832       0,                        /* compare.  */
1833       0,                        /* widen.  */
1834       0,                        /* narrow.  */
1835       0,                        /* toint.  */
1836       0,                        /* fromint.  */
1837       0                         /* roundint.  */
1838     },
1839     /* FP DFmode */
1840     {
1841       COSTS_N_INSNS (15),       /* div.  */
1842       COSTS_N_INSNS (5),        /* mult.  */
1843       COSTS_N_INSNS (7),        /* mult_addsub.  */
1844       COSTS_N_INSNS (7),        /* fma.  */
1845       COSTS_N_INSNS (3),        /* addsub.  */
1846       0,                        /* fpconst.  */
1847       0,                        /* neg.  */
1848       0,                        /* compare.  */
1849       0,                        /* widen.  */
1850       0,                        /* narrow.  */
1851       0,                        /* toint.  */
1852       0,                        /* fromint.  */
1853       0                         /* roundint.  */
1854     }
1855   },
1856   /* Vector */
1857   {
1858     COSTS_N_INSNS (1),  /* alu.  */
1859     COSTS_N_INSNS (4),  /* mult.  */
1860     COSTS_N_INSNS (1),  /* movi.  */
1861     COSTS_N_INSNS (2),  /* dup.  */
1862     COSTS_N_INSNS (2)   /* extract.  */
1863   }
1864 };
1865
1866 const struct addr_mode_cost_table generic_addr_mode_costs =
1867 {
1868   /* int.  */
1869   {
1870     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1871     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1872     COSTS_N_INSNS (0)   /* AMO_WB.  */
1873   },
1874   /* float.  */
1875   {
1876     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1877     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1878     COSTS_N_INSNS (0)   /* AMO_WB.  */
1879   },
1880   /* vector.  */
1881   {
1882     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1883     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1884     COSTS_N_INSNS (0)   /* AMO_WB.  */
1885   }
1886 };
1887
1888 const struct tune_params arm_slowmul_tune =
1889 {
1890   &generic_extra_costs,                 /* Insn extra costs.  */
1891   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1892   NULL,                                 /* Sched adj cost.  */
1893   arm_default_branch_cost,
1894   &arm_default_vec_cost,
1895   3,                                            /* Constant limit.  */
1896   5,                                            /* Max cond insns.  */
1897   8,                                            /* Memset max inline.  */
1898   1,                                            /* Issue rate.  */
1899   ARM_PREFETCH_NOT_BENEFICIAL,
1900   tune_params::PREF_CONST_POOL_TRUE,
1901   tune_params::PREF_LDRD_FALSE,
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1903   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1904   tune_params::DISPARAGE_FLAGS_NEITHER,
1905   tune_params::PREF_NEON_STRINGOPS_FALSE,
1906   tune_params::FUSE_NOTHING,
1907   tune_params::SCHED_AUTOPREF_OFF
1908 };
1909
1910 const struct tune_params arm_fastmul_tune =
1911 {
1912   &generic_extra_costs,                 /* Insn extra costs.  */
1913   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1914   NULL,                                 /* Sched adj cost.  */
1915   arm_default_branch_cost,
1916   &arm_default_vec_cost,
1917   1,                                            /* Constant limit.  */
1918   5,                                            /* Max cond insns.  */
1919   8,                                            /* Memset max inline.  */
1920   1,                                            /* Issue rate.  */
1921   ARM_PREFETCH_NOT_BENEFICIAL,
1922   tune_params::PREF_CONST_POOL_TRUE,
1923   tune_params::PREF_LDRD_FALSE,
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1925   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1926   tune_params::DISPARAGE_FLAGS_NEITHER,
1927   tune_params::PREF_NEON_STRINGOPS_FALSE,
1928   tune_params::FUSE_NOTHING,
1929   tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1934
1935 const struct tune_params arm_strongarm_tune =
1936 {
1937   &generic_extra_costs,                 /* Insn extra costs.  */
1938   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1939   NULL,                                 /* Sched adj cost.  */
1940   arm_default_branch_cost,
1941   &arm_default_vec_cost,
1942   1,                                            /* Constant limit.  */
1943   3,                                            /* Max cond insns.  */
1944   8,                                            /* Memset max inline.  */
1945   1,                                            /* Issue rate.  */
1946   ARM_PREFETCH_NOT_BENEFICIAL,
1947   tune_params::PREF_CONST_POOL_TRUE,
1948   tune_params::PREF_LDRD_FALSE,
1949   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1950   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1951   tune_params::DISPARAGE_FLAGS_NEITHER,
1952   tune_params::PREF_NEON_STRINGOPS_FALSE,
1953   tune_params::FUSE_NOTHING,
1954   tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_xscale_tune =
1958 {
1959   &generic_extra_costs,                 /* Insn extra costs.  */
1960   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1961   xscale_sched_adjust_cost,
1962   arm_default_branch_cost,
1963   &arm_default_vec_cost,
1964   2,                                            /* Constant limit.  */
1965   3,                                            /* Max cond insns.  */
1966   8,                                            /* Memset max inline.  */
1967   1,                                            /* Issue rate.  */
1968   ARM_PREFETCH_NOT_BENEFICIAL,
1969   tune_params::PREF_CONST_POOL_TRUE,
1970   tune_params::PREF_LDRD_FALSE,
1971   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1972   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1973   tune_params::DISPARAGE_FLAGS_NEITHER,
1974   tune_params::PREF_NEON_STRINGOPS_FALSE,
1975   tune_params::FUSE_NOTHING,
1976   tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_9e_tune =
1980 {
1981   &generic_extra_costs,                 /* Insn extra costs.  */
1982   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1983   NULL,                                 /* Sched adj cost.  */
1984   arm_default_branch_cost,
1985   &arm_default_vec_cost,
1986   1,                                            /* Constant limit.  */
1987   5,                                            /* Max cond insns.  */
1988   8,                                            /* Memset max inline.  */
1989   1,                                            /* Issue rate.  */
1990   ARM_PREFETCH_NOT_BENEFICIAL,
1991   tune_params::PREF_CONST_POOL_TRUE,
1992   tune_params::PREF_LDRD_FALSE,
1993   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1994   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1995   tune_params::DISPARAGE_FLAGS_NEITHER,
1996   tune_params::PREF_NEON_STRINGOPS_FALSE,
1997   tune_params::FUSE_NOTHING,
1998   tune_params::SCHED_AUTOPREF_OFF
1999 };
2000
2001 const struct tune_params arm_marvell_pj4_tune =
2002 {
2003   &generic_extra_costs,                 /* Insn extra costs.  */
2004   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2005   NULL,                                 /* Sched adj cost.  */
2006   arm_default_branch_cost,
2007   &arm_default_vec_cost,
2008   1,                                            /* Constant limit.  */
2009   5,                                            /* Max cond insns.  */
2010   8,                                            /* Memset max inline.  */
2011   2,                                            /* Issue rate.  */
2012   ARM_PREFETCH_NOT_BENEFICIAL,
2013   tune_params::PREF_CONST_POOL_TRUE,
2014   tune_params::PREF_LDRD_FALSE,
2015   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2016   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2017   tune_params::DISPARAGE_FLAGS_NEITHER,
2018   tune_params::PREF_NEON_STRINGOPS_FALSE,
2019   tune_params::FUSE_NOTHING,
2020   tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_v6t2_tune =
2024 {
2025   &generic_extra_costs,                 /* Insn extra costs.  */
2026   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2027   NULL,                                 /* Sched adj cost.  */
2028   arm_default_branch_cost,
2029   &arm_default_vec_cost,
2030   1,                                            /* Constant limit.  */
2031   5,                                            /* Max cond insns.  */
2032   8,                                            /* Memset max inline.  */
2033   1,                                            /* Issue rate.  */
2034   ARM_PREFETCH_NOT_BENEFICIAL,
2035   tune_params::PREF_CONST_POOL_FALSE,
2036   tune_params::PREF_LDRD_FALSE,
2037   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2038   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2039   tune_params::DISPARAGE_FLAGS_NEITHER,
2040   tune_params::PREF_NEON_STRINGOPS_FALSE,
2041   tune_params::FUSE_NOTHING,
2042   tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045
2046 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2047 const struct tune_params arm_cortex_tune =
2048 {
2049   &generic_extra_costs,
2050   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2051   NULL,                                 /* Sched adj cost.  */
2052   arm_default_branch_cost,
2053   &arm_default_vec_cost,
2054   1,                                            /* Constant limit.  */
2055   5,                                            /* Max cond insns.  */
2056   8,                                            /* Memset max inline.  */
2057   2,                                            /* Issue rate.  */
2058   ARM_PREFETCH_NOT_BENEFICIAL,
2059   tune_params::PREF_CONST_POOL_FALSE,
2060   tune_params::PREF_LDRD_FALSE,
2061   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2062   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2063   tune_params::DISPARAGE_FLAGS_NEITHER,
2064   tune_params::PREF_NEON_STRINGOPS_FALSE,
2065   tune_params::FUSE_NOTHING,
2066   tune_params::SCHED_AUTOPREF_OFF
2067 };
2068
2069 const struct tune_params arm_cortex_a8_tune =
2070 {
2071   &cortexa8_extra_costs,
2072   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2073   NULL,                                 /* Sched adj cost.  */
2074   arm_default_branch_cost,
2075   &arm_default_vec_cost,
2076   1,                                            /* Constant limit.  */
2077   5,                                            /* Max cond insns.  */
2078   8,                                            /* Memset max inline.  */
2079   2,                                            /* Issue rate.  */
2080   ARM_PREFETCH_NOT_BENEFICIAL,
2081   tune_params::PREF_CONST_POOL_FALSE,
2082   tune_params::PREF_LDRD_FALSE,
2083   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2084   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2085   tune_params::DISPARAGE_FLAGS_NEITHER,
2086   tune_params::PREF_NEON_STRINGOPS_TRUE,
2087   tune_params::FUSE_NOTHING,
2088   tune_params::SCHED_AUTOPREF_OFF
2089 };
2090
2091 const struct tune_params arm_cortex_a7_tune =
2092 {
2093   &cortexa7_extra_costs,
2094   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2095   NULL,                                 /* Sched adj cost.  */
2096   arm_default_branch_cost,
2097   &arm_default_vec_cost,
2098   1,                                            /* Constant limit.  */
2099   5,                                            /* Max cond insns.  */
2100   8,                                            /* Memset max inline.  */
2101   2,                                            /* Issue rate.  */
2102   ARM_PREFETCH_NOT_BENEFICIAL,
2103   tune_params::PREF_CONST_POOL_FALSE,
2104   tune_params::PREF_LDRD_FALSE,
2105   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2106   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2107   tune_params::DISPARAGE_FLAGS_NEITHER,
2108   tune_params::PREF_NEON_STRINGOPS_TRUE,
2109   tune_params::FUSE_NOTHING,
2110   tune_params::SCHED_AUTOPREF_OFF
2111 };
2112
2113 const struct tune_params arm_cortex_a15_tune =
2114 {
2115   &cortexa15_extra_costs,
2116   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2117   NULL,                                 /* Sched adj cost.  */
2118   arm_default_branch_cost,
2119   &arm_default_vec_cost,
2120   1,                                            /* Constant limit.  */
2121   2,                                            /* Max cond insns.  */
2122   8,                                            /* Memset max inline.  */
2123   3,                                            /* Issue rate.  */
2124   ARM_PREFETCH_NOT_BENEFICIAL,
2125   tune_params::PREF_CONST_POOL_FALSE,
2126   tune_params::PREF_LDRD_TRUE,
2127   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2128   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2129   tune_params::DISPARAGE_FLAGS_ALL,
2130   tune_params::PREF_NEON_STRINGOPS_TRUE,
2131   tune_params::FUSE_NOTHING,
2132   tune_params::SCHED_AUTOPREF_FULL
2133 };
2134
2135 const struct tune_params arm_cortex_a35_tune =
2136 {
2137   &cortexa53_extra_costs,
2138   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2139   NULL,                                 /* Sched adj cost.  */
2140   arm_default_branch_cost,
2141   &arm_default_vec_cost,
2142   1,                                            /* Constant limit.  */
2143   5,                                            /* Max cond insns.  */
2144   8,                                            /* Memset max inline.  */
2145   1,                                            /* Issue rate.  */
2146   ARM_PREFETCH_NOT_BENEFICIAL,
2147   tune_params::PREF_CONST_POOL_FALSE,
2148   tune_params::PREF_LDRD_FALSE,
2149   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2150   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2151   tune_params::DISPARAGE_FLAGS_NEITHER,
2152   tune_params::PREF_NEON_STRINGOPS_TRUE,
2153   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2154   tune_params::SCHED_AUTOPREF_OFF
2155 };
2156
2157 const struct tune_params arm_cortex_a53_tune =
2158 {
2159   &cortexa53_extra_costs,
2160   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2161   NULL,                                 /* Sched adj cost.  */
2162   arm_default_branch_cost,
2163   &arm_default_vec_cost,
2164   1,                                            /* Constant limit.  */
2165   5,                                            /* Max cond insns.  */
2166   8,                                            /* Memset max inline.  */
2167   2,                                            /* Issue rate.  */
2168   ARM_PREFETCH_NOT_BENEFICIAL,
2169   tune_params::PREF_CONST_POOL_FALSE,
2170   tune_params::PREF_LDRD_FALSE,
2171   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2172   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2173   tune_params::DISPARAGE_FLAGS_NEITHER,
2174   tune_params::PREF_NEON_STRINGOPS_TRUE,
2175   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2176   tune_params::SCHED_AUTOPREF_OFF
2177 };
2178
2179 const struct tune_params arm_cortex_a57_tune =
2180 {
2181   &cortexa57_extra_costs,
2182   &generic_addr_mode_costs,             /* addressing mode costs */
2183   NULL,                                 /* Sched adj cost.  */
2184   arm_default_branch_cost,
2185   &arm_default_vec_cost,
2186   1,                                            /* Constant limit.  */
2187   2,                                            /* Max cond insns.  */
2188   8,                                            /* Memset max inline.  */
2189   3,                                            /* Issue rate.  */
2190   ARM_PREFETCH_NOT_BENEFICIAL,
2191   tune_params::PREF_CONST_POOL_FALSE,
2192   tune_params::PREF_LDRD_TRUE,
2193   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2194   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2195   tune_params::DISPARAGE_FLAGS_ALL,
2196   tune_params::PREF_NEON_STRINGOPS_TRUE,
2197   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2198   tune_params::SCHED_AUTOPREF_FULL
2199 };
2200
2201 const struct tune_params arm_exynosm1_tune =
2202 {
2203   &exynosm1_extra_costs,
2204   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2205   NULL,                                         /* Sched adj cost.  */
2206   arm_default_branch_cost,
2207   &arm_default_vec_cost,
2208   1,                                            /* Constant limit.  */
2209   2,                                            /* Max cond insns.  */
2210   8,                                            /* Memset max inline.  */
2211   3,                                            /* Issue rate.  */
2212   ARM_PREFETCH_NOT_BENEFICIAL,
2213   tune_params::PREF_CONST_POOL_FALSE,
2214   tune_params::PREF_LDRD_TRUE,
2215   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2216   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2217   tune_params::DISPARAGE_FLAGS_ALL,
2218   tune_params::PREF_NEON_STRINGOPS_TRUE,
2219   tune_params::FUSE_NOTHING,
2220   tune_params::SCHED_AUTOPREF_OFF
2221 };
2222
2223 const struct tune_params arm_xgene1_tune =
2224 {
2225   &xgene1_extra_costs,
2226   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2227   NULL,                                 /* Sched adj cost.  */
2228   arm_default_branch_cost,
2229   &arm_default_vec_cost,
2230   1,                                            /* Constant limit.  */
2231   2,                                            /* Max cond insns.  */
2232   32,                                           /* Memset max inline.  */
2233   4,                                            /* Issue rate.  */
2234   ARM_PREFETCH_NOT_BENEFICIAL,
2235   tune_params::PREF_CONST_POOL_FALSE,
2236   tune_params::PREF_LDRD_TRUE,
2237   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2238   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2239   tune_params::DISPARAGE_FLAGS_ALL,
2240   tune_params::PREF_NEON_STRINGOPS_FALSE,
2241   tune_params::FUSE_NOTHING,
2242   tune_params::SCHED_AUTOPREF_OFF
2243 };
2244
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246    less appealing.  Set max_insns_skipped to a low value.  */
2247
2248 const struct tune_params arm_cortex_a5_tune =
2249 {
2250   &cortexa5_extra_costs,
2251   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2252   NULL,                                 /* Sched adj cost.  */
2253   arm_cortex_a5_branch_cost,
2254   &arm_default_vec_cost,
2255   1,                                            /* Constant limit.  */
2256   1,                                            /* Max cond insns.  */
2257   8,                                            /* Memset max inline.  */
2258   2,                                            /* Issue rate.  */
2259   ARM_PREFETCH_NOT_BENEFICIAL,
2260   tune_params::PREF_CONST_POOL_FALSE,
2261   tune_params::PREF_LDRD_FALSE,
2262   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2263   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2264   tune_params::DISPARAGE_FLAGS_NEITHER,
2265   tune_params::PREF_NEON_STRINGOPS_TRUE,
2266   tune_params::FUSE_NOTHING,
2267   tune_params::SCHED_AUTOPREF_OFF
2268 };
2269
2270 const struct tune_params arm_cortex_a9_tune =
2271 {
2272   &cortexa9_extra_costs,
2273   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2274   cortex_a9_sched_adjust_cost,
2275   arm_default_branch_cost,
2276   &arm_default_vec_cost,
2277   1,                                            /* Constant limit.  */
2278   5,                                            /* Max cond insns.  */
2279   8,                                            /* Memset max inline.  */
2280   2,                                            /* Issue rate.  */
2281   ARM_PREFETCH_BENEFICIAL(4,32,32),
2282   tune_params::PREF_CONST_POOL_FALSE,
2283   tune_params::PREF_LDRD_FALSE,
2284   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2285   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2286   tune_params::DISPARAGE_FLAGS_NEITHER,
2287   tune_params::PREF_NEON_STRINGOPS_FALSE,
2288   tune_params::FUSE_NOTHING,
2289   tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_cortex_a12_tune =
2293 {
2294   &cortexa12_extra_costs,
2295   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2296   NULL,                                 /* Sched adj cost.  */
2297   arm_default_branch_cost,
2298   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2299   1,                                            /* Constant limit.  */
2300   2,                                            /* Max cond insns.  */
2301   8,                                            /* Memset max inline.  */
2302   2,                                            /* Issue rate.  */
2303   ARM_PREFETCH_NOT_BENEFICIAL,
2304   tune_params::PREF_CONST_POOL_FALSE,
2305   tune_params::PREF_LDRD_TRUE,
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2307   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2308   tune_params::DISPARAGE_FLAGS_ALL,
2309   tune_params::PREF_NEON_STRINGOPS_TRUE,
2310   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 const struct tune_params arm_cortex_a73_tune =
2315 {
2316   &cortexa57_extra_costs,
2317   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2318   NULL,                                         /* Sched adj cost.  */
2319   arm_default_branch_cost,
2320   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2321   1,                                            /* Constant limit.  */
2322   2,                                            /* Max cond insns.  */
2323   8,                                            /* Memset max inline.  */
2324   2,                                            /* Issue rate.  */
2325   ARM_PREFETCH_NOT_BENEFICIAL,
2326   tune_params::PREF_CONST_POOL_FALSE,
2327   tune_params::PREF_LDRD_TRUE,
2328   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2329   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2330   tune_params::DISPARAGE_FLAGS_ALL,
2331   tune_params::PREF_NEON_STRINGOPS_TRUE,
2332   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2333   tune_params::SCHED_AUTOPREF_FULL
2334 };
2335
2336 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2337    cycle to execute each.  An LDR from the constant pool also takes two cycles
2338    to execute, but mildly increases pipelining opportunity (consecutive
2339    loads/stores can be pipelined together, saving one cycle), and may also
2340    improve icache utilisation.  Hence we prefer the constant pool for such
2341    processors.  */
2342
2343 const struct tune_params arm_v7m_tune =
2344 {
2345   &v7m_extra_costs,
2346   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2347   NULL,                                 /* Sched adj cost.  */
2348   arm_cortex_m_branch_cost,
2349   &arm_default_vec_cost,
2350   1,                                            /* Constant limit.  */
2351   2,                                            /* Max cond insns.  */
2352   8,                                            /* Memset max inline.  */
2353   1,                                            /* Issue rate.  */
2354   ARM_PREFETCH_NOT_BENEFICIAL,
2355   tune_params::PREF_CONST_POOL_TRUE,
2356   tune_params::PREF_LDRD_FALSE,
2357   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2358   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2359   tune_params::DISPARAGE_FLAGS_NEITHER,
2360   tune_params::PREF_NEON_STRINGOPS_FALSE,
2361   tune_params::FUSE_NOTHING,
2362   tune_params::SCHED_AUTOPREF_OFF
2363 };
2364
2365 /* Cortex-M7 tuning.  */
2366
2367 const struct tune_params arm_cortex_m7_tune =
2368 {
2369   &v7m_extra_costs,
2370   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2371   NULL,                                 /* Sched adj cost.  */
2372   arm_cortex_m7_branch_cost,
2373   &arm_default_vec_cost,
2374   0,                                            /* Constant limit.  */
2375   1,                                            /* Max cond insns.  */
2376   8,                                            /* Memset max inline.  */
2377   2,                                            /* Issue rate.  */
2378   ARM_PREFETCH_NOT_BENEFICIAL,
2379   tune_params::PREF_CONST_POOL_TRUE,
2380   tune_params::PREF_LDRD_FALSE,
2381   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2382   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2383   tune_params::DISPARAGE_FLAGS_NEITHER,
2384   tune_params::PREF_NEON_STRINGOPS_FALSE,
2385   tune_params::FUSE_NOTHING,
2386   tune_params::SCHED_AUTOPREF_OFF
2387 };
2388
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2391    cortex-m23.  */
2392 const struct tune_params arm_v6m_tune =
2393 {
2394   &generic_extra_costs,                 /* Insn extra costs.  */
2395   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2396   NULL,                                 /* Sched adj cost.  */
2397   arm_default_branch_cost,
2398   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2399   1,                                            /* Constant limit.  */
2400   5,                                            /* Max cond insns.  */
2401   8,                                            /* Memset max inline.  */
2402   1,                                            /* Issue rate.  */
2403   ARM_PREFETCH_NOT_BENEFICIAL,
2404   tune_params::PREF_CONST_POOL_FALSE,
2405   tune_params::PREF_LDRD_FALSE,
2406   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2407   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2408   tune_params::DISPARAGE_FLAGS_NEITHER,
2409   tune_params::PREF_NEON_STRINGOPS_FALSE,
2410   tune_params::FUSE_NOTHING,
2411   tune_params::SCHED_AUTOPREF_OFF
2412 };
2413
2414 const struct tune_params arm_fa726te_tune =
2415 {
2416   &generic_extra_costs,                         /* Insn extra costs.  */
2417   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2418   fa726te_sched_adjust_cost,
2419   arm_default_branch_cost,
2420   &arm_default_vec_cost,
2421   1,                                            /* Constant limit.  */
2422   5,                                            /* Max cond insns.  */
2423   8,                                            /* Memset max inline.  */
2424   2,                                            /* Issue rate.  */
2425   ARM_PREFETCH_NOT_BENEFICIAL,
2426   tune_params::PREF_CONST_POOL_TRUE,
2427   tune_params::PREF_LDRD_FALSE,
2428   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2429   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2430   tune_params::DISPARAGE_FLAGS_NEITHER,
2431   tune_params::PREF_NEON_STRINGOPS_FALSE,
2432   tune_params::FUSE_NOTHING,
2433   tune_params::SCHED_AUTOPREF_OFF
2434 };
2435
2436 char *accepted_branch_protection_string = NULL;
2437
2438 /* Auto-generated CPU, FPU and architecture tables.  */
2439 #include "arm-cpu-data.h"
2440
2441 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2442    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2443    is thus chosen to be big enough to hold the longest architecture name.  */
2444
2445 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2446
2447 /* Supported TLS relocations.  */
2448
2449 enum tls_reloc {
2450   TLS_GD32,
2451   TLS_GD32_FDPIC,
2452   TLS_LDM32,
2453   TLS_LDM32_FDPIC,
2454   TLS_LDO32,
2455   TLS_IE32,
2456   TLS_IE32_FDPIC,
2457   TLS_LE32,
2458   TLS_DESCSEQ   /* GNU scheme */
2459 };
2460
2461 /* The maximum number of insns to be used when loading a constant.  */
2462 inline static int
2463 arm_constant_limit (bool size_p)
2464 {
2465   return size_p ? 1 : current_tune->constant_limit;
2466 }
2467
2468 /* Emit an insn that's a simple single-set.  Both the operands must be known
2469    to be valid.  */
2470 inline static rtx_insn *
2471 emit_set_insn (rtx x, rtx y)
2472 {
2473   return emit_insn (gen_rtx_SET (x, y));
2474 }
2475
2476 /* Return the number of bits set in VALUE.  */
2477 static unsigned
2478 bit_count (unsigned long value)
2479 {
2480   unsigned long count = 0;
2481
2482   while (value)
2483     {
2484       count++;
2485       value &= value - 1;  /* Clear the least-significant set bit.  */
2486     }
2487
2488   return count;
2489 }
2490
2491 /* Return the number of bits set in BMAP.  */
2492 static unsigned
2493 bitmap_popcount (const sbitmap bmap)
2494 {
2495   unsigned int count = 0;
2496   unsigned int n = 0;
2497   sbitmap_iterator sbi;
2498
2499   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2500     count++;
2501   return count;
2502 }
2503
2504 typedef struct
2505 {
2506   machine_mode mode;
2507   const char *name;
2508 } arm_fixed_mode_set;
2509
2510 /* A small helper for setting fixed-point library libfuncs.  */
2511
2512 static void
2513 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2514                              const char *funcname, const char *modename,
2515                              int num_suffix)
2516 {
2517   char buffer[50];
2518
2519   if (num_suffix == 0)
2520     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2521   else
2522     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2523
2524   set_optab_libfunc (optable, mode, buffer);
2525 }
2526
2527 static void
2528 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2529                             machine_mode from, const char *funcname,
2530                             const char *toname, const char *fromname)
2531 {
2532   char buffer[50];
2533   const char *maybe_suffix_2 = "";
2534
2535   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2536   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2537       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2538       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2539     maybe_suffix_2 = "2";
2540
2541   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2542            maybe_suffix_2);
2543
2544   set_conv_libfunc (optable, to, from, buffer);
2545 }
2546
2547 static GTY(()) rtx speculation_barrier_libfunc;
2548
2549 /* Record that we have no arithmetic or comparison libfuncs for
2550    machine mode MODE.  */
2551
2552 static void
2553 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2554 {
2555   /* Arithmetic.  */
2556   set_optab_libfunc (add_optab, mode, NULL);
2557   set_optab_libfunc (sdiv_optab, mode, NULL);
2558   set_optab_libfunc (smul_optab, mode, NULL);
2559   set_optab_libfunc (neg_optab, mode, NULL);
2560   set_optab_libfunc (sub_optab, mode, NULL);
2561
2562   /* Comparisons.  */
2563   set_optab_libfunc (eq_optab, mode, NULL);
2564   set_optab_libfunc (ne_optab, mode, NULL);
2565   set_optab_libfunc (lt_optab, mode, NULL);
2566   set_optab_libfunc (le_optab, mode, NULL);
2567   set_optab_libfunc (ge_optab, mode, NULL);
2568   set_optab_libfunc (gt_optab, mode, NULL);
2569   set_optab_libfunc (unord_optab, mode, NULL);
2570 }
2571
2572 /* Set up library functions unique to ARM.  */
2573 static void
2574 arm_init_libfuncs (void)
2575 {
2576   machine_mode mode_iter;
2577
2578   /* For Linux, we have access to kernel support for atomic operations.  */
2579   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2580     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2581
2582   /* There are no special library functions unless we are using the
2583      ARM BPABI.  */
2584   if (!TARGET_BPABI)
2585     return;
2586
2587   /* The functions below are described in Section 4 of the "Run-Time
2588      ABI for the ARM architecture", Version 1.0.  */
2589
2590   /* Double-precision floating-point arithmetic.  Table 2.  */
2591   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2592   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2593   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2594   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2595   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2596
2597   /* Double-precision comparisons.  Table 3.  */
2598   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2599   set_optab_libfunc (ne_optab, DFmode, NULL);
2600   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2601   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2602   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2603   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2604   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2605
2606   /* Single-precision floating-point arithmetic.  Table 4.  */
2607   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2608   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2609   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2610   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2611   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2612
2613   /* Single-precision comparisons.  Table 5.  */
2614   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2615   set_optab_libfunc (ne_optab, SFmode, NULL);
2616   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2617   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2618   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2619   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2620   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2621
2622   /* Floating-point to integer conversions.  Table 6.  */
2623   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2624   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2625   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2626   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2627   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2628   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2629   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2630   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2631
2632   /* Conversions between floating types.  Table 7.  */
2633   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2634   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2635
2636   /* Integer to floating-point conversions.  Table 8.  */
2637   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2638   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2639   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2640   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2641   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2642   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2643   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2644   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2645
2646   /* Long long.  Table 9.  */
2647   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2648   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2649   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2650   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2651   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2652   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2653   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2654   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2655
2656   /* Integer (32/32->32) division.  \S 4.3.1.  */
2657   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2658   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2659
2660   /* The divmod functions are designed so that they can be used for
2661      plain division, even though they return both the quotient and the
2662      remainder.  The quotient is returned in the usual location (i.e.,
2663      r0 for SImode, {r0, r1} for DImode), just as would be expected
2664      for an ordinary division routine.  Because the AAPCS calling
2665      conventions specify that all of { r0, r1, r2, r3 } are
2666      callee-saved registers, there is no need to tell the compiler
2667      explicitly that those registers are clobbered by these
2668      routines.  */
2669   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2670   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2671
2672   /* For SImode division the ABI provides div-without-mod routines,
2673      which are faster.  */
2674   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2675   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2676
2677   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2678      divmod libcalls instead.  */
2679   set_optab_libfunc (smod_optab, DImode, NULL);
2680   set_optab_libfunc (umod_optab, DImode, NULL);
2681   set_optab_libfunc (smod_optab, SImode, NULL);
2682   set_optab_libfunc (umod_optab, SImode, NULL);
2683
2684   /* Half-precision float operations.  The compiler handles all operations
2685      with NULL libfuncs by converting the SFmode.  */
2686   switch (arm_fp16_format)
2687     {
2688     case ARM_FP16_FORMAT_IEEE:
2689     case ARM_FP16_FORMAT_ALTERNATIVE:
2690
2691       /* Conversions.  */
2692       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2693                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2694                          ? "__gnu_f2h_ieee"
2695                          : "__gnu_f2h_alternative"));
2696       set_conv_libfunc (sext_optab, SFmode, HFmode,
2697                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2698                          ? "__gnu_h2f_ieee"
2699                          : "__gnu_h2f_alternative"));
2700
2701       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2702                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2703                          ? "__gnu_d2h_ieee"
2704                          : "__gnu_d2h_alternative"));
2705
2706       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2707       break;
2708
2709     default:
2710       break;
2711     }
2712
2713   /* For all possible libcalls in BFmode, record NULL.  */
2714   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2715     {
2716       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2717       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2718       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2719       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2720     }
2721   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2722
2723   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2724   {
2725     const arm_fixed_mode_set fixed_arith_modes[] =
2726       {
2727         { E_QQmode, "qq" },
2728         { E_UQQmode, "uqq" },
2729         { E_HQmode, "hq" },
2730         { E_UHQmode, "uhq" },
2731         { E_SQmode, "sq" },
2732         { E_USQmode, "usq" },
2733         { E_DQmode, "dq" },
2734         { E_UDQmode, "udq" },
2735         { E_TQmode, "tq" },
2736         { E_UTQmode, "utq" },
2737         { E_HAmode, "ha" },
2738         { E_UHAmode, "uha" },
2739         { E_SAmode, "sa" },
2740         { E_USAmode, "usa" },
2741         { E_DAmode, "da" },
2742         { E_UDAmode, "uda" },
2743         { E_TAmode, "ta" },
2744         { E_UTAmode, "uta" }
2745       };
2746     const arm_fixed_mode_set fixed_conv_modes[] =
2747       {
2748         { E_QQmode, "qq" },
2749         { E_UQQmode, "uqq" },
2750         { E_HQmode, "hq" },
2751         { E_UHQmode, "uhq" },
2752         { E_SQmode, "sq" },
2753         { E_USQmode, "usq" },
2754         { E_DQmode, "dq" },
2755         { E_UDQmode, "udq" },
2756         { E_TQmode, "tq" },
2757         { E_UTQmode, "utq" },
2758         { E_HAmode, "ha" },
2759         { E_UHAmode, "uha" },
2760         { E_SAmode, "sa" },
2761         { E_USAmode, "usa" },
2762         { E_DAmode, "da" },
2763         { E_UDAmode, "uda" },
2764         { E_TAmode, "ta" },
2765         { E_UTAmode, "uta" },
2766         { E_QImode, "qi" },
2767         { E_HImode, "hi" },
2768         { E_SImode, "si" },
2769         { E_DImode, "di" },
2770         { E_TImode, "ti" },
2771         { E_SFmode, "sf" },
2772         { E_DFmode, "df" }
2773       };
2774     unsigned int i, j;
2775
2776     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2777       {
2778         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2779                                      "add", fixed_arith_modes[i].name, 3);
2780         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2781                                      "ssadd", fixed_arith_modes[i].name, 3);
2782         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2783                                      "usadd", fixed_arith_modes[i].name, 3);
2784         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2785                                      "sub", fixed_arith_modes[i].name, 3);
2786         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2787                                      "sssub", fixed_arith_modes[i].name, 3);
2788         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2789                                      "ussub", fixed_arith_modes[i].name, 3);
2790         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2791                                      "mul", fixed_arith_modes[i].name, 3);
2792         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2793                                      "ssmul", fixed_arith_modes[i].name, 3);
2794         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2795                                      "usmul", fixed_arith_modes[i].name, 3);
2796         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2797                                      "div", fixed_arith_modes[i].name, 3);
2798         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2799                                      "udiv", fixed_arith_modes[i].name, 3);
2800         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2801                                      "ssdiv", fixed_arith_modes[i].name, 3);
2802         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2803                                      "usdiv", fixed_arith_modes[i].name, 3);
2804         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2805                                      "neg", fixed_arith_modes[i].name, 2);
2806         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2807                                      "ssneg", fixed_arith_modes[i].name, 2);
2808         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2809                                      "usneg", fixed_arith_modes[i].name, 2);
2810         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2811                                      "ashl", fixed_arith_modes[i].name, 3);
2812         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2813                                      "ashr", fixed_arith_modes[i].name, 3);
2814         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2815                                      "lshr", fixed_arith_modes[i].name, 3);
2816         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2817                                      "ssashl", fixed_arith_modes[i].name, 3);
2818         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2819                                      "usashl", fixed_arith_modes[i].name, 3);
2820         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2821                                      "cmp", fixed_arith_modes[i].name, 2);
2822       }
2823
2824     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2825       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2826         {
2827           if (i == j
2828               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2829                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2830             continue;
2831
2832           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2833                                       fixed_conv_modes[j].mode, "fract",
2834                                       fixed_conv_modes[i].name,
2835                                       fixed_conv_modes[j].name);
2836           arm_set_fixed_conv_libfunc (satfract_optab,
2837                                       fixed_conv_modes[i].mode,
2838                                       fixed_conv_modes[j].mode, "satfract",
2839                                       fixed_conv_modes[i].name,
2840                                       fixed_conv_modes[j].name);
2841           arm_set_fixed_conv_libfunc (fractuns_optab,
2842                                       fixed_conv_modes[i].mode,
2843                                       fixed_conv_modes[j].mode, "fractuns",
2844                                       fixed_conv_modes[i].name,
2845                                       fixed_conv_modes[j].name);
2846           arm_set_fixed_conv_libfunc (satfractuns_optab,
2847                                       fixed_conv_modes[i].mode,
2848                                       fixed_conv_modes[j].mode, "satfractuns",
2849                                       fixed_conv_modes[i].name,
2850                                       fixed_conv_modes[j].name);
2851         }
2852   }
2853
2854   if (TARGET_AAPCS_BASED)
2855     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2856
2857   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2858 }
2859
2860 /* Implement TARGET_GIMPLE_FOLD_BUILTIN.  */
2861 static bool
2862 arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
2863 {
2864   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
2865   tree fndecl = gimple_call_fndecl (stmt);
2866   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
2867   unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
2868   gimple *new_stmt = NULL;
2869   switch (code & ARM_BUILTIN_CLASS)
2870     {
2871     case ARM_BUILTIN_GENERAL:
2872       break;
2873     case ARM_BUILTIN_MVE:
2874       new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
2875     }
2876   if (!new_stmt)
2877     return false;
2878
2879   gsi_replace (gsi, new_stmt, true);
2880   return true;
2881 }
2882
2883 /* On AAPCS systems, this is the "struct __va_list".  */
2884 static GTY(()) tree va_list_type;
2885
2886 /* Return the type to use as __builtin_va_list.  */
2887 static tree
2888 arm_build_builtin_va_list (void)
2889 {
2890   tree va_list_name;
2891   tree ap_field;
2892
2893   if (!TARGET_AAPCS_BASED)
2894     return std_build_builtin_va_list ();
2895
2896   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2897      defined as:
2898
2899        struct __va_list
2900        {
2901          void *__ap;
2902        };
2903
2904      The C Library ABI further reinforces this definition in \S
2905      4.1.
2906
2907      We must follow this definition exactly.  The structure tag
2908      name is visible in C++ mangled names, and thus forms a part
2909      of the ABI.  The field name may be used by people who
2910      #include <stdarg.h>.  */
2911   /* Create the type.  */
2912   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2913   /* Give it the required name.  */
2914   va_list_name = build_decl (BUILTINS_LOCATION,
2915                              TYPE_DECL,
2916                              get_identifier ("__va_list"),
2917                              va_list_type);
2918   DECL_ARTIFICIAL (va_list_name) = 1;
2919   TYPE_NAME (va_list_type) = va_list_name;
2920   TYPE_STUB_DECL (va_list_type) = va_list_name;
2921   /* Create the __ap field.  */
2922   ap_field = build_decl (BUILTINS_LOCATION,
2923                          FIELD_DECL,
2924                          get_identifier ("__ap"),
2925                          ptr_type_node);
2926   DECL_ARTIFICIAL (ap_field) = 1;
2927   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2928   TYPE_FIELDS (va_list_type) = ap_field;
2929   /* Compute its layout.  */
2930   layout_type (va_list_type);
2931
2932   return va_list_type;
2933 }
2934
2935 /* Return an expression of type "void *" pointing to the next
2936    available argument in a variable-argument list.  VALIST is the
2937    user-level va_list object, of type __builtin_va_list.  */
2938 static tree
2939 arm_extract_valist_ptr (tree valist)
2940 {
2941   if (TREE_TYPE (valist) == error_mark_node)
2942     return error_mark_node;
2943
2944   /* On an AAPCS target, the pointer is stored within "struct
2945      va_list".  */
2946   if (TARGET_AAPCS_BASED)
2947     {
2948       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2949       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2950                        valist, ap_field, NULL_TREE);
2951     }
2952
2953   return valist;
2954 }
2955
2956 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2957 static void
2958 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2959 {
2960   valist = arm_extract_valist_ptr (valist);
2961   std_expand_builtin_va_start (valist, nextarg);
2962 }
2963
2964 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2965 static tree
2966 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2967                           gimple_seq *post_p)
2968 {
2969   valist = arm_extract_valist_ptr (valist);
2970   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2971 }
2972
2973 /* Check any incompatible options that the user has specified.  */
2974 static void
2975 arm_option_check_internal (struct gcc_options *opts)
2976 {
2977   int flags = opts->x_target_flags;
2978
2979   /* iWMMXt and NEON are incompatible.  */
2980   if (TARGET_IWMMXT
2981       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2982     error ("iWMMXt and NEON are incompatible");
2983
2984   /* Make sure that the processor choice does not conflict with any of the
2985      other command line choices.  */
2986   if (TARGET_ARM_P (flags)
2987       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2988     error ("target CPU does not support ARM mode");
2989
2990   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2991   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2992     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2993
2994   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2995     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2996
2997   /* If this target is normally configured to use APCS frames, warn if they
2998      are turned off and debugging is turned on.  */
2999   if (TARGET_ARM_P (flags)
3000       && write_symbols != NO_DEBUG
3001       && !TARGET_APCS_FRAME
3002       && (TARGET_DEFAULT & MASK_APCS_FRAME))
3003     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3004              "debugging");
3005
3006   /* iWMMXt unsupported under Thumb mode.  */
3007   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
3008     error ("iWMMXt unsupported under Thumb mode");
3009
3010   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
3011     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3012
3013   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
3014     {
3015       error ("RTP PIC is incompatible with Thumb");
3016       flag_pic = 0;
3017     }
3018
3019   if (target_pure_code || target_slow_flash_data)
3020     {
3021       const char *flag = (target_pure_code ? "-mpure-code" :
3022                                              "-mslow-flash-data");
3023       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
3024
3025       /* We only support -mslow-flash-data on M-profile targets with
3026          MOVT.  */
3027       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
3028         error ("%s only supports non-pic code on M-profile targets with the "
3029                "MOVT instruction", flag);
3030
3031       /* We only support -mpure-code on M-profile targets.  */
3032       if (target_pure_code && common_unsupported_modes)
3033         error ("%s only supports non-pic code on M-profile targets", flag);
3034
3035       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3036          -mword-relocations forbids relocation of MOVT/MOVW.  */
3037       if (target_word_relocations)
3038         error ("%s incompatible with %<-mword-relocations%>", flag);
3039     }
3040 }
3041
3042 /* Recompute the global settings depending on target attribute options.  */
3043
3044 static void
3045 arm_option_params_internal (void)
3046 {
3047   /* If we are not using the default (ARM mode) section anchor offset
3048      ranges, then set the correct ranges now.  */
3049   if (TARGET_THUMB1)
3050     {
3051       /* Thumb-1 LDR instructions cannot have negative offsets.
3052          Permissible positive offset ranges are 5-bit (for byte loads),
3053          6-bit (for halfword loads), or 7-bit (for word loads).
3054          Empirical results suggest a 7-bit anchor range gives the best
3055          overall code size.  */
3056       targetm.min_anchor_offset = 0;
3057       targetm.max_anchor_offset = 127;
3058     }
3059   else if (TARGET_THUMB2)
3060     {
3061       /* The minimum is set such that the total size of the block
3062          for a particular anchor is 248 + 1 + 4095 bytes, which is
3063          divisible by eight, ensuring natural spacing of anchors.  */
3064       targetm.min_anchor_offset = -248;
3065       targetm.max_anchor_offset = 4095;
3066     }
3067   else
3068     {
3069       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3070       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3071     }
3072
3073   /* Increase the number of conditional instructions with -Os.  */
3074   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3075
3076   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3077   if (TARGET_THUMB2)
3078     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3079
3080   if (TARGET_THUMB1)
3081     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3082   else
3083     targetm.md_asm_adjust = arm_md_asm_adjust;
3084 }
3085
3086 /* True if -mflip-thumb should next add an attribute for the default
3087    mode, false if it should next add an attribute for the opposite mode.  */
3088 static GTY(()) bool thumb_flipper;
3089
3090 /* Options after initial target override.  */
3091 static GTY(()) tree init_optimize;
3092
3093 static void
3094 arm_override_options_after_change_1 (struct gcc_options *opts,
3095                                      struct gcc_options *opts_set)
3096 {
3097   /* -falign-functions without argument: supply one.  */
3098   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3099     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3100       && opts->x_optimize_size ? "2" : "4";
3101 }
3102
3103 /* Implement targetm.override_options_after_change.  */
3104
3105 static void
3106 arm_override_options_after_change (void)
3107 {
3108   arm_override_options_after_change_1 (&global_options, &global_options_set);
3109 }
3110
3111 /* Implement TARGET_OPTION_RESTORE.  */
3112 static void
3113 arm_option_restore (struct gcc_options */* opts */,
3114                     struct gcc_options */* opts_set */,
3115                     struct cl_target_option *ptr)
3116 {
3117   arm_configure_build_target (&arm_active_target, ptr, false);
3118   arm_option_reconfigure_globals ();
3119 }
3120
3121 /* Reset options between modes that the user has specified.  */
3122 static void
3123 arm_option_override_internal (struct gcc_options *opts,
3124                               struct gcc_options *opts_set)
3125 {
3126   arm_override_options_after_change_1 (opts, opts_set);
3127
3128   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3129     {
3130       /* The default is to enable interworking, so this warning message would
3131          be confusing to users who have just compiled with
3132          eg, -march=armv4.  */
3133       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3134       opts->x_target_flags &= ~MASK_INTERWORK;
3135     }
3136
3137   if (TARGET_THUMB_P (opts->x_target_flags)
3138       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3139     {
3140       warning (0, "target CPU does not support THUMB instructions");
3141       opts->x_target_flags &= ~MASK_THUMB;
3142     }
3143
3144   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3145     {
3146       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3147       opts->x_target_flags &= ~MASK_APCS_FRAME;
3148     }
3149
3150   /* Callee super interworking implies thumb interworking.  Adding
3151      this to the flags here simplifies the logic elsewhere.  */
3152   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3153     opts->x_target_flags |= MASK_INTERWORK;
3154
3155   /* need to remember initial values so combinaisons of options like
3156      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3157   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3158
3159   if (! opts_set->x_arm_restrict_it)
3160     opts->x_arm_restrict_it = arm_arch8;
3161
3162   /* ARM execution state and M profile don't have [restrict] IT.  */
3163   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3164     opts->x_arm_restrict_it = 0;
3165
3166   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3167   if (!opts_set->x_arm_restrict_it
3168       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3169     opts->x_arm_restrict_it = 0;
3170
3171   /* Enable -munaligned-access by default for
3172      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3173      i.e. Thumb2 and ARM state only.
3174      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3175      - ARMv8 architecture-base processors.
3176
3177      Disable -munaligned-access by default for
3178      - all pre-ARMv6 architecture-based processors
3179      - ARMv6-M architecture-based processors
3180      - ARMv8-M Baseline processors.  */
3181
3182   if (! opts_set->x_unaligned_access)
3183     {
3184       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3185                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3186     }
3187   else if (opts->x_unaligned_access == 1
3188            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3189     {
3190       warning (0, "target CPU does not support unaligned accesses");
3191      opts->x_unaligned_access = 0;
3192     }
3193
3194   /* Don't warn since it's on by default in -O2.  */
3195   if (TARGET_THUMB1_P (opts->x_target_flags))
3196     opts->x_flag_schedule_insns = 0;
3197   else
3198     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3199
3200   /* Disable shrink-wrap when optimizing function for size, since it tends to
3201      generate additional returns.  */
3202   if (optimize_function_for_size_p (cfun)
3203       && TARGET_THUMB2_P (opts->x_target_flags))
3204     opts->x_flag_shrink_wrap = false;
3205   else
3206     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3207
3208   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3209      - epilogue_insns - does not accurately model the corresponding insns
3210      emitted in the asm file.  In particular, see the comment in thumb_exit
3211      'Find out how many of the (return) argument registers we can corrupt'.
3212      As a consequence, the epilogue may clobber registers without fipa-ra
3213      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3214      TODO: Accurately model clobbers for epilogue_insns and reenable
3215      fipa-ra.  */
3216   if (TARGET_THUMB1_P (opts->x_target_flags))
3217     opts->x_flag_ipa_ra = 0;
3218   else
3219     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3220
3221   /* Thumb2 inline assembly code should always use unified syntax.
3222      This will apply to ARM and Thumb1 eventually.  */
3223   if (TARGET_THUMB2_P (opts->x_target_flags))
3224     opts->x_inline_asm_unified = true;
3225
3226   if (arm_stack_protector_guard == SSP_GLOBAL
3227       && opts->x_arm_stack_protector_guard_offset_str)
3228     {
3229       error ("incompatible options %<-mstack-protector-guard=global%> and "
3230              "%<-mstack-protector-guard-offset=%s%>",
3231              arm_stack_protector_guard_offset_str);
3232     }
3233
3234   if (opts->x_arm_stack_protector_guard_offset_str)
3235     {
3236       char *end;
3237       const char *str = arm_stack_protector_guard_offset_str;
3238       errno = 0;
3239       long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3240       if (!*str || *end || errno)
3241         error ("%qs is not a valid offset in %qs", str,
3242                "-mstack-protector-guard-offset=");
3243       arm_stack_protector_guard_offset = offs;
3244     }
3245
3246   if (arm_current_function_pac_enabled_p ())
3247     {
3248       if (!arm_arch8m_main)
3249         error ("This architecture does not support branch protection "
3250                "instructions");
3251       if (TARGET_TPCS_FRAME)
3252         sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3253     }
3254
3255 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3256   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3257 #endif
3258 }
3259
3260 static sbitmap isa_all_fpubits_internal;
3261 static sbitmap isa_all_fpbits;
3262 static sbitmap isa_quirkbits;
3263
3264 /* Configure a build target TARGET from the user-specified options OPTS and
3265    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3266    architecture have been specified, but the two are not identical.  */
3267 void
3268 arm_configure_build_target (struct arm_build_target *target,
3269                             struct cl_target_option *opts,
3270                             bool warn_compatible)
3271 {
3272   const cpu_option *arm_selected_tune = NULL;
3273   const arch_option *arm_selected_arch = NULL;
3274   const cpu_option *arm_selected_cpu = NULL;
3275   const arm_fpu_desc *arm_selected_fpu = NULL;
3276   const char *tune_opts = NULL;
3277   const char *arch_opts = NULL;
3278   const char *cpu_opts = NULL;
3279
3280   bitmap_clear (target->isa);
3281   target->core_name = NULL;
3282   target->arch_name = NULL;
3283
3284   if (opts->x_arm_arch_string)
3285     {
3286       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3287                                                       "-march",
3288                                                       opts->x_arm_arch_string);
3289       arch_opts = strchr (opts->x_arm_arch_string, '+');
3290     }
3291
3292   if (opts->x_arm_cpu_string)
3293     {
3294       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3295                                                     opts->x_arm_cpu_string);
3296       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3297       arm_selected_tune = arm_selected_cpu;
3298       /* If taking the tuning from -mcpu, we don't need to rescan the
3299          options for tuning.  */
3300     }
3301
3302   if (opts->x_arm_tune_string)
3303     {
3304       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3305                                                      opts->x_arm_tune_string);
3306       tune_opts = strchr (opts->x_arm_tune_string, '+');
3307     }
3308
3309   if (opts->x_arm_branch_protection_string)
3310     {
3311       aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3312
3313       if (aarch_ra_sign_key != AARCH_KEY_A)
3314         {
3315           warning (0, "invalid key type for %<-mbranch-protection=%>");
3316           aarch_ra_sign_key = AARCH_KEY_A;
3317         }
3318     }
3319
3320   if (arm_selected_arch)
3321     {
3322       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3323       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3324                                  arch_opts);
3325
3326       if (arm_selected_cpu)
3327         {
3328           auto_sbitmap cpu_isa (isa_num_bits);
3329           auto_sbitmap isa_delta (isa_num_bits);
3330
3331           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3332           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3333                                      cpu_opts);
3334           bitmap_xor (isa_delta, cpu_isa, target->isa);
3335           /* Ignore any bits that are quirk bits.  */
3336           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3337           /* If the user (or the default configuration) has specified a
3338              specific FPU, then ignore any bits that depend on the FPU
3339              configuration.  Do similarly if using the soft-float
3340              ABI.  */
3341           if (opts->x_arm_fpu_index != TARGET_FPU_auto
3342               || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3343             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3344
3345           if (!bitmap_empty_p (isa_delta))
3346             {
3347               if (warn_compatible)
3348                 warning (0, "switch %<-mcpu=%s%> conflicts "
3349                          "with switch %<-march=%s%>",
3350                          opts->x_arm_cpu_string,
3351                          opts->x_arm_arch_string);
3352
3353               /* -march wins for code generation.
3354                  -mcpu wins for default tuning.  */
3355               if (!arm_selected_tune)
3356                 arm_selected_tune = arm_selected_cpu;
3357
3358               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3359               target->arch_name = arm_selected_arch->common.name;
3360             }
3361           else
3362             {
3363               /* Architecture and CPU are essentially the same.
3364                  Prefer the CPU setting.  */
3365               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3366               target->core_name = arm_selected_cpu->common.name;
3367               /* Copy the CPU's capabilities, so that we inherit the
3368                  appropriate extensions and quirks.  */
3369               bitmap_copy (target->isa, cpu_isa);
3370             }
3371         }
3372       else
3373         {
3374           /* Pick a CPU based on the architecture.  */
3375           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3376           target->arch_name = arm_selected_arch->common.name;
3377           /* Note: target->core_name is left unset in this path.  */
3378         }
3379     }
3380   else if (arm_selected_cpu)
3381     {
3382       target->core_name = arm_selected_cpu->common.name;
3383       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3384       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3385                                  cpu_opts);
3386       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3387     }
3388   /* If the user did not specify a processor or architecture, choose
3389      one for them.  */
3390   else
3391     {
3392       const cpu_option *sel;
3393       auto_sbitmap sought_isa (isa_num_bits);
3394       bitmap_clear (sought_isa);
3395       auto_sbitmap default_isa (isa_num_bits);
3396
3397       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3398                                                     TARGET_CPU_DEFAULT);
3399       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3400       gcc_assert (arm_selected_cpu->common.name);
3401
3402       /* RWE: All of the selection logic below (to the end of this
3403          'if' clause) looks somewhat suspect.  It appears to be mostly
3404          there to support forcing thumb support when the default CPU
3405          does not have thumb (somewhat dubious in terms of what the
3406          user might be expecting).  I think it should be removed once
3407          support for the pre-thumb era cores is removed.  */
3408       sel = arm_selected_cpu;
3409       arm_initialize_isa (default_isa, sel->common.isa_bits);
3410       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3411                                  cpu_opts);
3412
3413       /* Now check to see if the user has specified any command line
3414          switches that require certain abilities from the cpu.  */
3415
3416       if (TARGET_INTERWORK || TARGET_THUMB)
3417         bitmap_set_bit (sought_isa, isa_bit_thumb);
3418
3419       /* If there are such requirements and the default CPU does not
3420          satisfy them, we need to run over the complete list of
3421          cores looking for one that is satisfactory.  */
3422       if (!bitmap_empty_p (sought_isa)
3423           && !bitmap_subset_p (sought_isa, default_isa))
3424         {
3425           auto_sbitmap candidate_isa (isa_num_bits);
3426           /* We're only interested in a CPU with at least the
3427              capabilities of the default CPU and the required
3428              additional features.  */
3429           bitmap_ior (default_isa, default_isa, sought_isa);
3430
3431           /* Try to locate a CPU type that supports all of the abilities
3432              of the default CPU, plus the extra abilities requested by
3433              the user.  */
3434           for (sel = all_cores; sel->common.name != NULL; sel++)
3435             {
3436               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3437               /* An exact match?  */
3438               if (bitmap_equal_p (default_isa, candidate_isa))
3439                 break;
3440             }
3441
3442           if (sel->common.name == NULL)
3443             {
3444               unsigned current_bit_count = isa_num_bits;
3445               const cpu_option *best_fit = NULL;
3446
3447               /* Ideally we would like to issue an error message here
3448                  saying that it was not possible to find a CPU compatible
3449                  with the default CPU, but which also supports the command
3450                  line options specified by the programmer, and so they
3451                  ought to use the -mcpu=<name> command line option to
3452                  override the default CPU type.
3453
3454                  If we cannot find a CPU that has exactly the
3455                  characteristics of the default CPU and the given
3456                  command line options we scan the array again looking
3457                  for a best match.  The best match must have at least
3458                  the capabilities of the perfect match.  */
3459               for (sel = all_cores; sel->common.name != NULL; sel++)
3460                 {
3461                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3462
3463                   if (bitmap_subset_p (default_isa, candidate_isa))
3464                     {
3465                       unsigned count;
3466
3467                       bitmap_and_compl (candidate_isa, candidate_isa,
3468                                         default_isa);
3469                       count = bitmap_popcount (candidate_isa);
3470
3471                       if (count < current_bit_count)
3472                         {
3473                           best_fit = sel;
3474                           current_bit_count = count;
3475                         }
3476                     }
3477
3478                   gcc_assert (best_fit);
3479                   sel = best_fit;
3480                 }
3481             }
3482           arm_selected_cpu = sel;
3483         }
3484
3485       /* Now we know the CPU, we can finally initialize the target
3486          structure.  */
3487       target->core_name = arm_selected_cpu->common.name;
3488       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3489       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3490                                  cpu_opts);
3491       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3492     }
3493
3494   gcc_assert (arm_selected_cpu);
3495   gcc_assert (arm_selected_arch);
3496
3497   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3498     {
3499       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3500       auto_sbitmap fpu_bits (isa_num_bits);
3501
3502       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3503       /* This should clear out ALL bits relating to the FPU/simd
3504          extensions, to avoid potentially invalid combinations later on
3505          that we can't match.  At present we only clear out those bits
3506          that can be set by -mfpu.  This should be fixed in GCC-12.  */
3507       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3508       bitmap_ior (target->isa, target->isa, fpu_bits);
3509     }
3510
3511   /* If we have the soft-float ABI, clear any feature bits relating to use of
3512      floating-point operations.  They'll just confuse things later on.  */
3513   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3514     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3515
3516   /* There may be implied bits which we still need to enable. These are
3517      non-named features which are needed to complete other sets of features,
3518      but cannot be enabled from arm-cpus.in due to being shared between
3519      multiple fgroups. Each entry in all_implied_fbits is of the form
3520      ante -> cons, meaning that if the feature "ante" is enabled, we should
3521      implicitly enable "cons".  */
3522   const struct fbit_implication *impl = all_implied_fbits;
3523   while (impl->ante)
3524     {
3525       if (bitmap_bit_p (target->isa, impl->ante))
3526         bitmap_set_bit (target->isa, impl->cons);
3527       impl++;
3528     }
3529
3530   if (!arm_selected_tune)
3531     arm_selected_tune = arm_selected_cpu;
3532   else /* Validate the features passed to -mtune.  */
3533     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3534
3535   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3536
3537   /* Finish initializing the target structure.  */
3538   if (!target->arch_name)
3539     target->arch_name = arm_selected_arch->common.name;
3540   target->arch_pp_name = arm_selected_arch->arch;
3541   target->base_arch = arm_selected_arch->base_arch;
3542   target->profile = arm_selected_arch->profile;
3543
3544   target->tune_flags = tune_data->tune_flags;
3545   target->tune = tune_data->tune;
3546   target->tune_core = tune_data->scheduler;
3547 }
3548
3549 /* Fix up any incompatible options that the user has specified.  */
3550 static void
3551 arm_option_override (void)
3552 {
3553   static const enum isa_feature fpu_bitlist_internal[]
3554     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3555   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3556   static const enum isa_feature fp_bitlist[]
3557     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3558   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3559   cl_target_option opts;
3560
3561   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3562   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3563
3564   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3565   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3566   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3567   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3568
3569   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3570
3571   if (!OPTION_SET_P (arm_fpu_index))
3572     {
3573       bool ok;
3574       int fpu_index;
3575
3576       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3577                                   CL_TARGET);
3578       gcc_assert (ok);
3579       arm_fpu_index = (enum fpu_type) fpu_index;
3580     }
3581
3582   cl_target_option_save (&opts, &global_options, &global_options_set);
3583   arm_configure_build_target (&arm_active_target, &opts, true);
3584
3585 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3586   SUBTARGET_OVERRIDE_OPTIONS;
3587 #endif
3588
3589   /* Initialize boolean versions of the architectural flags, for use
3590      in the arm.md file and for enabling feature flags.  */
3591   arm_option_reconfigure_globals ();
3592
3593   arm_tune = arm_active_target.tune_core;
3594   tune_flags = arm_active_target.tune_flags;
3595   current_tune = arm_active_target.tune;
3596
3597   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3598   if (TARGET_APCS_FRAME)
3599     flag_shrink_wrap = false;
3600
3601   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3602     {
3603       warning (0, "%<-mapcs-stack-check%> incompatible with "
3604                "%<-mno-apcs-frame%>");
3605       target_flags |= MASK_APCS_FRAME;
3606     }
3607
3608   if (TARGET_POKE_FUNCTION_NAME)
3609     target_flags |= MASK_APCS_FRAME;
3610
3611   if (TARGET_APCS_REENT && flag_pic)
3612     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3613
3614   if (TARGET_APCS_REENT)
3615     warning (0, "APCS reentrant code not supported.  Ignored");
3616
3617   /* Set up some tuning parameters.  */
3618   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3619   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3620   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3621   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3622   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3623   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3624
3625   /* For arm2/3 there is no need to do any scheduling if we are doing
3626      software floating-point.  */
3627   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3628     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3629
3630   /* Override the default structure alignment for AAPCS ABI.  */
3631   if (!OPTION_SET_P (arm_structure_size_boundary))
3632     {
3633       if (TARGET_AAPCS_BASED)
3634         arm_structure_size_boundary = 8;
3635     }
3636   else
3637     {
3638       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3639
3640       if (arm_structure_size_boundary != 8
3641           && arm_structure_size_boundary != 32
3642           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3643         {
3644           if (ARM_DOUBLEWORD_ALIGN)
3645             warning (0,
3646                      "structure size boundary can only be set to 8, 32 or 64");
3647           else
3648             warning (0, "structure size boundary can only be set to 8 or 32");
3649           arm_structure_size_boundary
3650             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3651         }
3652     }
3653
3654   if (TARGET_VXWORKS_RTP)
3655     {
3656       if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3657         arm_pic_data_is_text_relative = 0;
3658     }
3659   else if (flag_pic
3660            && !arm_pic_data_is_text_relative
3661            && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3662     /* When text & data segments don't have a fixed displacement, the
3663        intended use is with a single, read only, pic base register.
3664        Unless the user explicitly requested not to do that, set
3665        it.  */
3666     target_flags |= MASK_SINGLE_PIC_BASE;
3667
3668   /* If stack checking is disabled, we can use r10 as the PIC register,
3669      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3670   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3671     {
3672       if (TARGET_VXWORKS_RTP)
3673         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3674       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3675     }
3676
3677   if (flag_pic && TARGET_VXWORKS_RTP)
3678     arm_pic_register = 9;
3679
3680   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3681   if (TARGET_FDPIC)
3682     {
3683       arm_pic_register = FDPIC_REGNUM;
3684       if (TARGET_THUMB1)
3685         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3686     }
3687
3688   if (arm_pic_register_string != NULL)
3689     {
3690       int pic_register = decode_reg_name (arm_pic_register_string);
3691
3692       if (!flag_pic)
3693         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3694
3695       /* Prevent the user from choosing an obviously stupid PIC register.  */
3696       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3697                || pic_register == HARD_FRAME_POINTER_REGNUM
3698                || pic_register == STACK_POINTER_REGNUM
3699                || pic_register >= PC_REGNUM
3700                || (TARGET_VXWORKS_RTP
3701                    && (unsigned int) pic_register != arm_pic_register))
3702         error ("unable to use %qs for PIC register", arm_pic_register_string);
3703       else
3704         arm_pic_register = pic_register;
3705     }
3706
3707   if (flag_pic)
3708     target_word_relocations = 1;
3709
3710   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3711   if (fix_cm3_ldrd == 2)
3712     {
3713       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3714         fix_cm3_ldrd = 1;
3715       else
3716         fix_cm3_ldrd = 0;
3717     }
3718
3719   /* Enable fix_vlldm by default if required.  */
3720   if (fix_vlldm == 2)
3721     {
3722       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3723         fix_vlldm = 1;
3724       else
3725         fix_vlldm = 0;
3726     }
3727
3728   /* Enable fix_aes by default if required.  */
3729   if (fix_aes_erratum_1742098 == 2)
3730     {
3731       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3732         fix_aes_erratum_1742098 = 1;
3733       else
3734         fix_aes_erratum_1742098 = 0;
3735     }
3736
3737   /* Hot/Cold partitioning is not currently supported, since we can't
3738      handle literal pool placement in that case.  */
3739   if (flag_reorder_blocks_and_partition)
3740     {
3741       inform (input_location,
3742               "%<-freorder-blocks-and-partition%> not supported "
3743               "on this architecture");
3744       flag_reorder_blocks_and_partition = 0;
3745       flag_reorder_blocks = 1;
3746     }
3747
3748   if (flag_pic)
3749     /* Hoisting PIC address calculations more aggressively provides a small,
3750        but measurable, size reduction for PIC code.  Therefore, we decrease
3751        the bar for unrestricted expression hoisting to the cost of PIC address
3752        calculation, which is 2 instructions.  */
3753     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3754                          param_gcse_unrestricted_cost, 2);
3755
3756   /* ARM EABI defaults to strict volatile bitfields.  */
3757   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3758       && abi_version_at_least(2))
3759     flag_strict_volatile_bitfields = 1;
3760
3761   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3762      have deemed it beneficial (signified by setting
3763      prefetch.num_slots to 1 or more).  */
3764   if (flag_prefetch_loop_arrays < 0
3765       && HAVE_prefetch
3766       && optimize >= 3
3767       && current_tune->prefetch.num_slots > 0)
3768     flag_prefetch_loop_arrays = 1;
3769
3770   /* Set up parameters to be used in prefetching algorithm.  Do not
3771      override the defaults unless we are tuning for a core we have
3772      researched values for.  */
3773   if (current_tune->prefetch.num_slots > 0)
3774     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3775                          param_simultaneous_prefetches,
3776                          current_tune->prefetch.num_slots);
3777   if (current_tune->prefetch.l1_cache_line_size >= 0)
3778     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3779                          param_l1_cache_line_size,
3780                          current_tune->prefetch.l1_cache_line_size);
3781   if (current_tune->prefetch.l1_cache_line_size >= 0)
3782     {
3783       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3784                            param_destruct_interfere_size,
3785                            current_tune->prefetch.l1_cache_line_size);
3786       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3787                            param_construct_interfere_size,
3788                            current_tune->prefetch.l1_cache_line_size);
3789     }
3790   else
3791     {
3792       /* For a generic ARM target, JF Bastien proposed using 64 for both.  */
3793       /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3794          constructive?  */
3795       /* More recent Cortex chips have a 64-byte cache line, but are marked
3796          ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults.  */
3797       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3798                            param_destruct_interfere_size, 64);
3799       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3800                            param_construct_interfere_size, 64);
3801     }
3802
3803   if (current_tune->prefetch.l1_cache_size >= 0)
3804     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3805                          param_l1_cache_size,
3806                          current_tune->prefetch.l1_cache_size);
3807
3808   /* Look through ready list and all of queue for instructions
3809      relevant for L2 auto-prefetcher.  */
3810   int sched_autopref_queue_depth;
3811
3812   switch (current_tune->sched_autopref)
3813     {
3814     case tune_params::SCHED_AUTOPREF_OFF:
3815       sched_autopref_queue_depth = -1;
3816       break;
3817
3818     case tune_params::SCHED_AUTOPREF_RANK:
3819       sched_autopref_queue_depth = 0;
3820       break;
3821
3822     case tune_params::SCHED_AUTOPREF_FULL:
3823       sched_autopref_queue_depth = max_insn_queue_index + 1;
3824       break;
3825
3826     default:
3827       gcc_unreachable ();
3828     }
3829
3830   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3831                        param_sched_autopref_queue_depth,
3832                        sched_autopref_queue_depth);
3833
3834   /* Currently, for slow flash data, we just disable literal pools.  We also
3835      disable it for pure-code.  */
3836   if (target_slow_flash_data || target_pure_code)
3837     arm_disable_literal_pool = true;
3838
3839   /* Disable scheduling fusion by default if it's not armv7 processor
3840      or doesn't prefer ldrd/strd.  */
3841   if (flag_schedule_fusion == 2
3842       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3843     flag_schedule_fusion = 0;
3844
3845   /* Need to remember initial options before they are overriden.  */
3846   init_optimize = build_optimization_node (&global_options,
3847                                            &global_options_set);
3848
3849   arm_options_perform_arch_sanity_checks ();
3850   arm_option_override_internal (&global_options, &global_options_set);
3851   arm_option_check_internal (&global_options);
3852   arm_option_params_internal ();
3853
3854   /* Create the default target_options structure.  */
3855   target_option_default_node = target_option_current_node
3856     = build_target_option_node (&global_options, &global_options_set);
3857
3858   /* Register global variables with the garbage collector.  */
3859   arm_add_gc_roots ();
3860
3861   /* Init initial mode for testing.  */
3862   thumb_flipper = TARGET_THUMB;
3863 }
3864
3865
3866 /* Reconfigure global status flags from the active_target.isa.  */
3867 void
3868 arm_option_reconfigure_globals (void)
3869 {
3870   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3871   arm_base_arch = arm_active_target.base_arch;
3872
3873   /* Initialize boolean versions of the architectural flags, for use
3874      in the arm.md file.  */
3875   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3876   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3877   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3878   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3879   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3880   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3881   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3882   arm_arch6m = arm_arch6 && !arm_arch_notm;
3883   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3884   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3885   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3886   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3887   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3888   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3889   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3890   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3891                                     isa_bit_armv8_1m_main);
3892   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3893   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3894   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3895   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3896   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3897   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3898   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3899   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3900   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3901   arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3902   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3903   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3904   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3905
3906   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3907   if (arm_fp16_inst)
3908     {
3909       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3910         error ("selected fp16 options are incompatible");
3911       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3912     }
3913
3914   arm_arch_cde = 0;
3915   arm_arch_cde_coproc = 0;
3916   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3917                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3918                     isa_bit_cdecp6, isa_bit_cdecp7};
3919   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3920     {
3921       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3922       if (cde_bit)
3923         {
3924           arm_arch_cde |= cde_bit;
3925           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3926         }
3927     }
3928
3929   /* And finally, set up some quirks.  */
3930   arm_arch_no_volatile_ce
3931     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3932   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3933                                             isa_bit_quirk_armv6kz);
3934
3935   /* Use the cp15 method if it is available.  */
3936   if (target_thread_pointer == TP_AUTO)
3937     {
3938       if (arm_arch6k && !TARGET_THUMB1)
3939         target_thread_pointer = TP_TPIDRURO;
3940       else
3941         target_thread_pointer = TP_SOFT;
3942     }
3943
3944   if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3945     error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3946 }
3947
3948 /* Perform some validation between the desired architecture and the rest of the
3949    options.  */
3950 void
3951 arm_options_perform_arch_sanity_checks (void)
3952 {
3953   /* V5T code we generate is completely interworking capable, so we turn off
3954      TARGET_INTERWORK here to avoid many tests later on.  */
3955
3956   /* XXX However, we must pass the right pre-processor defines to CPP
3957      or GLD can get confused.  This is a hack.  */
3958   if (TARGET_INTERWORK)
3959     arm_cpp_interwork = 1;
3960
3961   if (arm_arch5t)
3962     target_flags &= ~MASK_INTERWORK;
3963
3964   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3965     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3966
3967   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3968     error ("iwmmxt abi requires an iwmmxt capable cpu");
3969
3970   /* BPABI targets use linker tricks to allow interworking on cores
3971      without thumb support.  */
3972   if (TARGET_INTERWORK
3973       && !TARGET_BPABI
3974       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3975     {
3976       warning (0, "target CPU does not support interworking" );
3977       target_flags &= ~MASK_INTERWORK;
3978     }
3979
3980   /* If soft-float is specified then don't use FPU.  */
3981   if (TARGET_SOFT_FLOAT)
3982     arm_fpu_attr = FPU_NONE;
3983   else
3984     arm_fpu_attr = FPU_VFP;
3985
3986   if (TARGET_AAPCS_BASED)
3987     {
3988       if (TARGET_CALLER_INTERWORKING)
3989         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3990       else
3991         if (TARGET_CALLEE_INTERWORKING)
3992           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3993     }
3994
3995   /* __fp16 support currently assumes the core has ldrh.  */
3996   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3997     sorry ("%<__fp16%> and no ldrh");
3998
3999   if (use_cmse && !arm_arch_cmse)
4000     error ("target CPU does not support ARMv8-M Security Extensions");
4001
4002   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4003      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
4004   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
4005     error ("ARMv8-M Security Extensions incompatible with selected FPU");
4006
4007
4008   if (TARGET_AAPCS_BASED)
4009     {
4010       if (arm_abi == ARM_ABI_IWMMXT)
4011         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
4012       else if (TARGET_HARD_FLOAT_ABI)
4013         {
4014           arm_pcs_default = ARM_PCS_AAPCS_VFP;
4015           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
4016               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
4017             error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4018         }
4019       else
4020         arm_pcs_default = ARM_PCS_AAPCS;
4021     }
4022   else
4023     {
4024       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
4025         sorry ("%<-mfloat-abi=hard%> and VFP");
4026
4027       if (arm_abi == ARM_ABI_APCS)
4028         arm_pcs_default = ARM_PCS_APCS;
4029       else
4030         arm_pcs_default = ARM_PCS_ATPCS;
4031     }
4032 }
4033
4034 /* Test whether a local function descriptor is canonical, i.e.,
4035    whether we can use GOTOFFFUNCDESC to compute the address of the
4036    function.  */
4037 static bool
4038 arm_fdpic_local_funcdesc_p (rtx fnx)
4039 {
4040   tree fn;
4041   enum symbol_visibility vis;
4042   bool ret;
4043
4044   if (!TARGET_FDPIC)
4045     return true;
4046
4047   if (! SYMBOL_REF_LOCAL_P (fnx))
4048     return false;
4049
4050   fn = SYMBOL_REF_DECL (fnx);
4051
4052   if (! fn)
4053     return false;
4054
4055   vis = DECL_VISIBILITY (fn);
4056
4057   if (vis == VISIBILITY_PROTECTED)
4058     /* Private function descriptors for protected functions are not
4059        canonical.  Temporarily change the visibility to global so that
4060        we can ensure uniqueness of funcdesc pointers.  */
4061     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4062
4063   ret = default_binds_local_p_1 (fn, flag_pic);
4064
4065   DECL_VISIBILITY (fn) = vis;
4066
4067   return ret;
4068 }
4069
4070 static void
4071 arm_add_gc_roots (void)
4072 {
4073   gcc_obstack_init(&minipool_obstack);
4074   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4075 }
4076 \f
4077 /* A table of known ARM exception types.
4078    For use with the interrupt function attribute.  */
4079
4080 typedef struct
4081 {
4082   const char *const arg;
4083   const unsigned long return_value;
4084 }
4085 isr_attribute_arg;
4086
4087 static const isr_attribute_arg isr_attribute_args [] =
4088 {
4089   { "IRQ",   ARM_FT_ISR },
4090   { "irq",   ARM_FT_ISR },
4091   { "FIQ",   ARM_FT_FIQ },
4092   { "fiq",   ARM_FT_FIQ },
4093   { "ABORT", ARM_FT_ISR },
4094   { "abort", ARM_FT_ISR },
4095   { "UNDEF", ARM_FT_EXCEPTION },
4096   { "undef", ARM_FT_EXCEPTION },
4097   { "SWI",   ARM_FT_EXCEPTION },
4098   { "swi",   ARM_FT_EXCEPTION },
4099   { NULL,    ARM_FT_NORMAL }
4100 };
4101
4102 /* Returns the (interrupt) function type of the current
4103    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
4104
4105 static unsigned long
4106 arm_isr_value (tree argument)
4107 {
4108   const isr_attribute_arg * ptr;
4109   const char *              arg;
4110
4111   if (!arm_arch_notm)
4112     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4113
4114   /* No argument - default to IRQ.  */
4115   if (argument == NULL_TREE)
4116     return ARM_FT_ISR;
4117
4118   /* Get the value of the argument.  */
4119   if (TREE_VALUE (argument) == NULL_TREE
4120       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4121     return ARM_FT_UNKNOWN;
4122
4123   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4124
4125   /* Check it against the list of known arguments.  */
4126   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4127     if (streq (arg, ptr->arg))
4128       return ptr->return_value;
4129
4130   /* An unrecognized interrupt type.  */
4131   return ARM_FT_UNKNOWN;
4132 }
4133
4134 /* Computes the type of the current function.  */
4135
4136 static unsigned long
4137 arm_compute_func_type (void)
4138 {
4139   unsigned long type = ARM_FT_UNKNOWN;
4140   tree a;
4141   tree attr;
4142
4143   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4144
4145   /* Decide if the current function is volatile.  Such functions
4146      never return, and many memory cycles can be saved by not storing
4147      register values that will never be needed again.  This optimization
4148      was added to speed up context switching in a kernel application.  */
4149   if (optimize > 0
4150       && (TREE_NOTHROW (current_function_decl)
4151           || !(flag_unwind_tables
4152                || (flag_exceptions
4153                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4154       && TREE_THIS_VOLATILE (current_function_decl))
4155     type |= ARM_FT_VOLATILE;
4156
4157   if (cfun->static_chain_decl != NULL)
4158     type |= ARM_FT_NESTED;
4159
4160   attr = DECL_ATTRIBUTES (current_function_decl);
4161
4162   a = lookup_attribute ("naked", attr);
4163   if (a != NULL_TREE)
4164     type |= ARM_FT_NAKED;
4165
4166   a = lookup_attribute ("isr", attr);
4167   if (a == NULL_TREE)
4168     a = lookup_attribute ("interrupt", attr);
4169
4170   if (a == NULL_TREE)
4171     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4172   else
4173     type |= arm_isr_value (TREE_VALUE (a));
4174
4175   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4176     type |= ARM_FT_CMSE_ENTRY;
4177
4178   return type;
4179 }
4180
4181 /* Returns the type of the current function.  */
4182
4183 unsigned long
4184 arm_current_func_type (void)
4185 {
4186   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4187     cfun->machine->func_type = arm_compute_func_type ();
4188
4189   return cfun->machine->func_type;
4190 }
4191
4192 bool
4193 arm_allocate_stack_slots_for_args (void)
4194 {
4195   /* Naked functions should not allocate stack slots for arguments.  */
4196   return !IS_NAKED (arm_current_func_type ());
4197 }
4198
4199 static bool
4200 arm_warn_func_return (tree decl)
4201 {
4202   /* Naked functions are implemented entirely in assembly, including the
4203      return sequence, so suppress warnings about this.  */
4204   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4205 }
4206
4207 \f
4208 /* Output assembler code for a block containing the constant parts
4209    of a trampoline, leaving space for the variable parts.
4210
4211    On the ARM, (if r8 is the static chain regnum, and remembering that
4212    referencing pc adds an offset of 8) the trampoline looks like:
4213            ldr          r8, [pc, #0]
4214            ldr          pc, [pc]
4215            .word        static chain value
4216            .word        function's address
4217    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4218
4219    In FDPIC mode, the trampoline looks like:
4220            .word        trampoline address
4221            .word        trampoline GOT address
4222            ldr          r12, [pc, #8] ; #4 for Arm mode
4223            ldr          r9,  [pc, #8] ; #4 for Arm mode
4224            ldr          pc,  [pc, #8] ; #4 for Arm mode
4225            .word        static chain value
4226            .word        GOT address
4227            .word        function's address
4228 */
4229
4230 static void
4231 arm_asm_trampoline_template (FILE *f)
4232 {
4233   fprintf (f, "\t.syntax unified\n");
4234
4235   if (TARGET_FDPIC)
4236     {
4237       /* The first two words are a function descriptor pointing to the
4238          trampoline code just below.  */
4239       if (TARGET_ARM)
4240         fprintf (f, "\t.arm\n");
4241       else if (TARGET_THUMB2)
4242         fprintf (f, "\t.thumb\n");
4243       else
4244         /* Only ARM and Thumb-2 are supported.  */
4245         gcc_unreachable ();
4246
4247       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4248       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4249       /* Trampoline code which sets the static chain register but also
4250          PIC register before jumping into real code.  */
4251       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4252                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4253                    TARGET_THUMB2 ? 8 : 4);
4254       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4255                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4256                    TARGET_THUMB2 ? 8 : 4);
4257       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4258                    PC_REGNUM, PC_REGNUM,
4259                    TARGET_THUMB2 ? 8 : 4);
4260       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4261     }
4262   else if (TARGET_ARM)
4263     {
4264       fprintf (f, "\t.arm\n");
4265       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4266       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4267     }
4268   else if (TARGET_THUMB2)
4269     {
4270       fprintf (f, "\t.thumb\n");
4271       /* The Thumb-2 trampoline is similar to the arm implementation.
4272          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4273       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4274                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4275       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4276     }
4277   else
4278     {
4279       ASM_OUTPUT_ALIGN (f, 2);
4280       fprintf (f, "\t.code\t16\n");
4281       fprintf (f, ".Ltrampoline_start:\n");
4282       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4283       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4284       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4285       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4286       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4287       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4288     }
4289   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4290   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4291 }
4292
4293 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4294
4295 static void
4296 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4297 {
4298   rtx fnaddr, mem, a_tramp;
4299
4300   emit_block_move (m_tramp, assemble_trampoline_template (),
4301                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4302
4303   if (TARGET_FDPIC)
4304     {
4305       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4306       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4307       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4308       /* The function start address is at offset 8, but in Thumb mode
4309          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4310          below.  */
4311       rtx trampoline_code_start
4312         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4313
4314       /* Write initial funcdesc which points to the trampoline.  */
4315       mem = adjust_address (m_tramp, SImode, 0);
4316       emit_move_insn (mem, trampoline_code_start);
4317       mem = adjust_address (m_tramp, SImode, 4);
4318       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4319       /* Setup static chain.  */
4320       mem = adjust_address (m_tramp, SImode, 20);
4321       emit_move_insn (mem, chain_value);
4322       /* GOT + real function entry point.  */
4323       mem = adjust_address (m_tramp, SImode, 24);
4324       emit_move_insn (mem, gotaddr);
4325       mem = adjust_address (m_tramp, SImode, 28);
4326       emit_move_insn (mem, fnaddr);
4327     }
4328   else
4329     {
4330       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4331       emit_move_insn (mem, chain_value);
4332
4333       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4334       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4335       emit_move_insn (mem, fnaddr);
4336     }
4337
4338   a_tramp = XEXP (m_tramp, 0);
4339   maybe_emit_call_builtin___clear_cache (a_tramp,
4340                                          plus_constant (ptr_mode,
4341                                                         a_tramp,
4342                                                         TRAMPOLINE_SIZE));
4343 }
4344
4345 /* Thumb trampolines should be entered in thumb mode, so set
4346    the bottom bit of the address.  */
4347
4348 static rtx
4349 arm_trampoline_adjust_address (rtx addr)
4350 {
4351   /* For FDPIC don't fix trampoline address since it's a function
4352      descriptor and not a function address.  */
4353   if (TARGET_THUMB && !TARGET_FDPIC)
4354     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4355                                 NULL, 0, OPTAB_LIB_WIDEN);
4356   return addr;
4357 }
4358 \f
4359 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4360    includes call-clobbered registers too.  If this is a leaf function
4361    we can just examine the registers used by the RTL, but otherwise we
4362    have to assume that whatever function is called might clobber
4363    anything, and so we have to save all the call-clobbered registers
4364    as well.  */
4365 static inline bool reg_needs_saving_p (unsigned reg)
4366 {
4367   unsigned long func_type = arm_current_func_type ();
4368
4369   if (IS_INTERRUPT (func_type))
4370     if (df_regs_ever_live_p (reg)
4371         /* Save call-clobbered core registers.  */
4372         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4373       return true;
4374     else
4375       return false;
4376   else
4377     if (!df_regs_ever_live_p (reg)
4378         || call_used_or_fixed_reg_p (reg))
4379       return false;
4380     else
4381       return true;
4382 }
4383
4384 /* Return 1 if it is possible to return using a single instruction.
4385    If SIBLING is non-null, this is a test for a return before a sibling
4386    call.  SIBLING is the call insn, so we can examine its register usage.  */
4387
4388 int
4389 use_return_insn (int iscond, rtx sibling)
4390 {
4391   int regno;
4392   unsigned int func_type;
4393   unsigned long saved_int_regs;
4394   unsigned HOST_WIDE_INT stack_adjust;
4395   arm_stack_offsets *offsets;
4396
4397   /* Never use a return instruction before reload has run.  */
4398   if (!reload_completed)
4399     return 0;
4400
4401   /* Never use a return instruction when return address signing
4402      mechanism is enabled as it requires more than one
4403      instruction.  */
4404   if (arm_current_function_pac_enabled_p ())
4405     return 0;
4406
4407   func_type = arm_current_func_type ();
4408
4409   /* Naked, volatile and stack alignment functions need special
4410      consideration.  */
4411   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4412     return 0;
4413
4414   /* So do interrupt functions that use the frame pointer and Thumb
4415      interrupt functions.  */
4416   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4417     return 0;
4418
4419   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4420       && !optimize_function_for_size_p (cfun))
4421     return 0;
4422
4423   offsets = arm_get_frame_offsets ();
4424   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4425
4426   /* As do variadic functions.  */
4427   if (crtl->args.pretend_args_size
4428       || cfun->machine->uses_anonymous_args
4429       /* Or if the function calls __builtin_eh_return () */
4430       || crtl->calls_eh_return
4431       /* Or if the function calls alloca */
4432       || cfun->calls_alloca
4433       /* Or if there is a stack adjustment.  However, if the stack pointer
4434          is saved on the stack, we can use a pre-incrementing stack load.  */
4435       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4436                                  && stack_adjust == 4))
4437       /* Or if the static chain register was saved above the frame, under the
4438          assumption that the stack pointer isn't saved on the stack.  */
4439       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4440           && arm_compute_static_chain_stack_bytes() != 0))
4441     return 0;
4442
4443   saved_int_regs = offsets->saved_regs_mask;
4444
4445   /* Unfortunately, the insn
4446
4447        ldmib sp, {..., sp, ...}
4448
4449      triggers a bug on most SA-110 based devices, such that the stack
4450      pointer won't be correctly restored if the instruction takes a
4451      page fault.  We work around this problem by popping r3 along with
4452      the other registers, since that is never slower than executing
4453      another instruction.
4454
4455      We test for !arm_arch5t here, because code for any architecture
4456      less than this could potentially be run on one of the buggy
4457      chips.  */
4458   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4459     {
4460       /* Validate that r3 is a call-clobbered register (always true in
4461          the default abi) ...  */
4462       if (!call_used_or_fixed_reg_p (3))
4463         return 0;
4464
4465       /* ... that it isn't being used for a return value ... */
4466       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4467         return 0;
4468
4469       /* ... or for a tail-call argument ...  */
4470       if (sibling)
4471         {
4472           gcc_assert (CALL_P (sibling));
4473
4474           if (find_regno_fusage (sibling, USE, 3))
4475             return 0;
4476         }
4477
4478       /* ... and that there are no call-saved registers in r0-r2
4479          (always true in the default ABI).  */
4480       if (saved_int_regs & 0x7)
4481         return 0;
4482     }
4483
4484   /* Can't be done if interworking with Thumb, and any registers have been
4485      stacked.  */
4486   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4487     return 0;
4488
4489   /* On StrongARM, conditional returns are expensive if they aren't
4490      taken and multiple registers have been stacked.  */
4491   if (iscond && arm_tune_strongarm)
4492     {
4493       /* Conditional return when just the LR is stored is a simple
4494          conditional-load instruction, that's not expensive.  */
4495       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4496         return 0;
4497
4498       if (flag_pic
4499           && arm_pic_register != INVALID_REGNUM
4500           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4501         return 0;
4502     }
4503
4504   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4505      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4506      also needs several instructions to save and restore FP context.  */
4507   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4508     return 0;
4509
4510   /* If there are saved registers but the LR isn't saved, then we need
4511      two instructions for the return.  */
4512   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4513     return 0;
4514
4515   /* Can't be done if any of the VFP regs are pushed,
4516      since this also requires an insn.  */
4517   if (TARGET_VFP_BASE)
4518     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4519       if (reg_needs_saving_p (regno))
4520         return 0;
4521
4522   if (TARGET_REALLY_IWMMXT)
4523     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4524       if (reg_needs_saving_p (regno))
4525         return 0;
4526
4527   return 1;
4528 }
4529
4530 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4531    shrink-wrapping if possible.  This is the case if we need to emit a
4532    prologue, which we can test by looking at the offsets.  */
4533 bool
4534 use_simple_return_p (void)
4535 {
4536   arm_stack_offsets *offsets;
4537
4538   /* Note this function can be called before or after reload.  */
4539   if (!reload_completed)
4540     arm_compute_frame_layout ();
4541
4542   offsets = arm_get_frame_offsets ();
4543   return offsets->outgoing_args != 0;
4544 }
4545
4546 /* Return TRUE if int I is a valid immediate ARM constant.  */
4547
4548 int
4549 const_ok_for_arm (HOST_WIDE_INT i)
4550 {
4551   int lowbit;
4552
4553   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4554      be all zero, or all one.  */
4555   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4556       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4557           != ((~(unsigned HOST_WIDE_INT) 0)
4558               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4559     return FALSE;
4560
4561   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4562
4563   /* Fast return for 0 and small values.  We must do this for zero, since
4564      the code below can't handle that one case.  */
4565   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4566     return TRUE;
4567
4568   /* Get the number of trailing zeros.  */
4569   lowbit = ffs((int) i) - 1;
4570
4571   /* Only even shifts are allowed in ARM mode so round down to the
4572      nearest even number.  */
4573   if (TARGET_ARM)
4574     lowbit &= ~1;
4575
4576   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4577     return TRUE;
4578
4579   if (TARGET_ARM)
4580     {
4581       /* Allow rotated constants in ARM mode.  */
4582       if (lowbit <= 4
4583            && ((i & ~0xc000003f) == 0
4584                || (i & ~0xf000000f) == 0
4585                || (i & ~0xfc000003) == 0))
4586         return TRUE;
4587     }
4588   else if (TARGET_THUMB2)
4589     {
4590       HOST_WIDE_INT v;
4591
4592       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4593       v = i & 0xff;
4594       v |= v << 16;
4595       if (i == v || i == (v | (v << 8)))
4596         return TRUE;
4597
4598       /* Allow repeated pattern 0xXY00XY00.  */
4599       v = i & 0xff00;
4600       v |= v << 16;
4601       if (i == v)
4602         return TRUE;
4603     }
4604   else if (TARGET_HAVE_MOVT)
4605     {
4606       /* Thumb-1 Targets with MOVT.  */
4607       if (i > 0xffff)
4608         return FALSE;
4609       else
4610         return TRUE;
4611     }
4612
4613   return FALSE;
4614 }
4615
4616 /* Return true if I is a valid constant for the operation CODE.  */
4617 int
4618 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4619 {
4620   if (const_ok_for_arm (i))
4621     return 1;
4622
4623   switch (code)
4624     {
4625     case SET:
4626       /* See if we can use movw.  */
4627       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4628         return 1;
4629       else
4630         /* Otherwise, try mvn.  */
4631         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4632
4633     case PLUS:
4634       /* See if we can use addw or subw.  */
4635       if (TARGET_THUMB2
4636           && ((i & 0xfffff000) == 0
4637               || ((-i) & 0xfffff000) == 0))
4638         return 1;
4639       /* Fall through.  */
4640     case COMPARE:
4641     case EQ:
4642     case NE:
4643     case GT:
4644     case LE:
4645     case LT:
4646     case GE:
4647     case GEU:
4648     case LTU:
4649     case GTU:
4650     case LEU:
4651     case UNORDERED:
4652     case ORDERED:
4653     case UNEQ:
4654     case UNGE:
4655     case UNLT:
4656     case UNGT:
4657     case UNLE:
4658       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4659
4660     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4661     case XOR:
4662       return 0;
4663
4664     case IOR:
4665       if (TARGET_THUMB2)
4666         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4667       return 0;
4668
4669     case AND:
4670       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4671
4672     default:
4673       gcc_unreachable ();
4674     }
4675 }
4676
4677 /* Return true if I is a valid di mode constant for the operation CODE.  */
4678 int
4679 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4680 {
4681   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4682   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4683   rtx hi = GEN_INT (hi_val);
4684   rtx lo = GEN_INT (lo_val);
4685
4686   if (TARGET_THUMB1)
4687     return 0;
4688
4689   switch (code)
4690     {
4691     case AND:
4692     case IOR:
4693     case XOR:
4694       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4695              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4696     case PLUS:
4697       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4698
4699     default:
4700       return 0;
4701     }
4702 }
4703
4704 /* Emit a sequence of insns to handle a large constant.
4705    CODE is the code of the operation required, it can be any of SET, PLUS,
4706    IOR, AND, XOR, MINUS;
4707    MODE is the mode in which the operation is being performed;
4708    VAL is the integer to operate on;
4709    SOURCE is the other operand (a register, or a null-pointer for SET);
4710    SUBTARGETS means it is safe to create scratch registers if that will
4711    either produce a simpler sequence, or we will want to cse the values.
4712    Return value is the number of insns emitted.  */
4713
4714 /* ??? Tweak this for thumb2.  */
4715 int
4716 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4717                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4718 {
4719   rtx cond;
4720
4721   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4722     cond = COND_EXEC_TEST (PATTERN (insn));
4723   else
4724     cond = NULL_RTX;
4725
4726   if (subtargets || code == SET
4727       || (REG_P (target) && REG_P (source)
4728           && REGNO (target) != REGNO (source)))
4729     {
4730       /* After arm_reorg has been called, we can't fix up expensive
4731          constants by pushing them into memory so we must synthesize
4732          them in-line, regardless of the cost.  This is only likely to
4733          be more costly on chips that have load delay slots and we are
4734          compiling without running the scheduler (so no splitting
4735          occurred before the final instruction emission).
4736
4737          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4738       */
4739       if (!cfun->machine->after_arm_reorg
4740           && !cond
4741           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4742                                 1, 0)
4743               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4744                  + (code != SET))))
4745         {
4746           if (code == SET)
4747             {
4748               /* Currently SET is the only monadic value for CODE, all
4749                  the rest are diadic.  */
4750               if (TARGET_USE_MOVT)
4751                 arm_emit_movpair (target, GEN_INT (val));
4752               else
4753                 emit_set_insn (target, GEN_INT (val));
4754
4755               return 1;
4756             }
4757           else
4758             {
4759               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4760
4761               if (TARGET_USE_MOVT)
4762                 arm_emit_movpair (temp, GEN_INT (val));
4763               else
4764                 emit_set_insn (temp, GEN_INT (val));
4765
4766               /* For MINUS, the value is subtracted from, since we never
4767                  have subtraction of a constant.  */
4768               if (code == MINUS)
4769                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4770               else
4771                 emit_set_insn (target,
4772                                gen_rtx_fmt_ee (code, mode, source, temp));
4773               return 2;
4774             }
4775         }
4776     }
4777
4778   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4779                            1);
4780 }
4781
4782 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4783    ARM/THUMB2 immediates, and add up to VAL.
4784    Thr function return value gives the number of insns required.  */
4785 static int
4786 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4787                             struct four_ints *return_sequence)
4788 {
4789   int best_consecutive_zeros = 0;
4790   int i;
4791   int best_start = 0;
4792   int insns1, insns2;
4793   struct four_ints tmp_sequence;
4794
4795   /* If we aren't targeting ARM, the best place to start is always at
4796      the bottom, otherwise look more closely.  */
4797   if (TARGET_ARM)
4798     {
4799       for (i = 0; i < 32; i += 2)
4800         {
4801           int consecutive_zeros = 0;
4802
4803           if (!(val & (3 << i)))
4804             {
4805               while ((i < 32) && !(val & (3 << i)))
4806                 {
4807                   consecutive_zeros += 2;
4808                   i += 2;
4809                 }
4810               if (consecutive_zeros > best_consecutive_zeros)
4811                 {
4812                   best_consecutive_zeros = consecutive_zeros;
4813                   best_start = i - consecutive_zeros;
4814                 }
4815               i -= 2;
4816             }
4817         }
4818     }
4819
4820   /* So long as it won't require any more insns to do so, it's
4821      desirable to emit a small constant (in bits 0...9) in the last
4822      insn.  This way there is more chance that it can be combined with
4823      a later addressing insn to form a pre-indexed load or store
4824      operation.  Consider:
4825
4826            *((volatile int *)0xe0000100) = 1;
4827            *((volatile int *)0xe0000110) = 2;
4828
4829      We want this to wind up as:
4830
4831             mov rA, #0xe0000000
4832             mov rB, #1
4833             str rB, [rA, #0x100]
4834             mov rB, #2
4835             str rB, [rA, #0x110]
4836
4837      rather than having to synthesize both large constants from scratch.
4838
4839      Therefore, we calculate how many insns would be required to emit
4840      the constant starting from `best_start', and also starting from
4841      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4842      yield a shorter sequence, we may as well use zero.  */
4843   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4844   if (best_start != 0
4845       && ((HOST_WIDE_INT_1U << best_start) < val))
4846     {
4847       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4848       if (insns2 <= insns1)
4849         {
4850           *return_sequence = tmp_sequence;
4851           insns1 = insns2;
4852         }
4853     }
4854
4855   return insns1;
4856 }
4857
4858 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4859 static int
4860 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4861                              struct four_ints *return_sequence, int i)
4862 {
4863   int remainder = val & 0xffffffff;
4864   int insns = 0;
4865
4866   /* Try and find a way of doing the job in either two or three
4867      instructions.
4868
4869      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4870      location.  We start at position I.  This may be the MSB, or
4871      optimial_immediate_sequence may have positioned it at the largest block
4872      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4873      wrapping around to the top of the word when we drop off the bottom.
4874      In the worst case this code should produce no more than four insns.
4875
4876      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4877      constants, shifted to any arbitrary location.  We should always start
4878      at the MSB.  */
4879   do
4880     {
4881       int end;
4882       unsigned int b1, b2, b3, b4;
4883       unsigned HOST_WIDE_INT result;
4884       int loc;
4885
4886       gcc_assert (insns < 4);
4887
4888       if (i <= 0)
4889         i += 32;
4890
4891       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4892       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4893         {
4894           loc = i;
4895           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4896             /* We can use addw/subw for the last 12 bits.  */
4897             result = remainder;
4898           else
4899             {
4900               /* Use an 8-bit shifted/rotated immediate.  */
4901               end = i - 8;
4902               if (end < 0)
4903                 end += 32;
4904               result = remainder & ((0x0ff << end)
4905                                    | ((i < end) ? (0xff >> (32 - end))
4906                                                 : 0));
4907               i -= 8;
4908             }
4909         }
4910       else
4911         {
4912           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4913              arbitrary shifts.  */
4914           i -= TARGET_ARM ? 2 : 1;
4915           continue;
4916         }
4917
4918       /* Next, see if we can do a better job with a thumb2 replicated
4919          constant.
4920
4921          We do it this way around to catch the cases like 0x01F001E0 where
4922          two 8-bit immediates would work, but a replicated constant would
4923          make it worse.
4924
4925          TODO: 16-bit constants that don't clear all the bits, but still win.
4926          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4927       if (TARGET_THUMB2)
4928         {
4929           b1 = (remainder & 0xff000000) >> 24;
4930           b2 = (remainder & 0x00ff0000) >> 16;
4931           b3 = (remainder & 0x0000ff00) >> 8;
4932           b4 = remainder & 0xff;
4933
4934           if (loc > 24)
4935             {
4936               /* The 8-bit immediate already found clears b1 (and maybe b2),
4937                  but must leave b3 and b4 alone.  */
4938
4939               /* First try to find a 32-bit replicated constant that clears
4940                  almost everything.  We can assume that we can't do it in one,
4941                  or else we wouldn't be here.  */
4942               unsigned int tmp = b1 & b2 & b3 & b4;
4943               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4944                                   + (tmp << 24);
4945               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4946                                             + (tmp == b3) + (tmp == b4);
4947               if (tmp
4948                   && (matching_bytes >= 3
4949                       || (matching_bytes == 2
4950                           && const_ok_for_op (remainder & ~tmp2, code))))
4951                 {
4952                   /* At least 3 of the bytes match, and the fourth has at
4953                      least as many bits set, or two of the bytes match
4954                      and it will only require one more insn to finish.  */
4955                   result = tmp2;
4956                   i = tmp != b1 ? 32
4957                       : tmp != b2 ? 24
4958                       : tmp != b3 ? 16
4959                       : 8;
4960                 }
4961
4962               /* Second, try to find a 16-bit replicated constant that can
4963                  leave three of the bytes clear.  If b2 or b4 is already
4964                  zero, then we can.  If the 8-bit from above would not
4965                  clear b2 anyway, then we still win.  */
4966               else if (b1 == b3 && (!b2 || !b4
4967                                || (remainder & 0x00ff0000 & ~result)))
4968                 {
4969                   result = remainder & 0xff00ff00;
4970                   i = 24;
4971                 }
4972             }
4973           else if (loc > 16)
4974             {
4975               /* The 8-bit immediate already found clears b2 (and maybe b3)
4976                  and we don't get here unless b1 is alredy clear, but it will
4977                  leave b4 unchanged.  */
4978
4979               /* If we can clear b2 and b4 at once, then we win, since the
4980                  8-bits couldn't possibly reach that far.  */
4981               if (b2 == b4)
4982                 {
4983                   result = remainder & 0x00ff00ff;
4984                   i = 16;
4985                 }
4986             }
4987         }
4988
4989       return_sequence->i[insns++] = result;
4990       remainder &= ~result;
4991
4992       if (code == SET || code == MINUS)
4993         code = PLUS;
4994     }
4995   while (remainder);
4996
4997   return insns;
4998 }
4999
5000 /* Emit an instruction with the indicated PATTERN.  If COND is
5001    non-NULL, conditionalize the execution of the instruction on COND
5002    being true.  */
5003
5004 static void
5005 emit_constant_insn (rtx cond, rtx pattern)
5006 {
5007   if (cond)
5008     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
5009   emit_insn (pattern);
5010 }
5011
5012 /* As above, but extra parameter GENERATE which, if clear, suppresses
5013    RTL generation.  */
5014
5015 static int
5016 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
5017                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
5018                   int subtargets, int generate)
5019 {
5020   int can_invert = 0;
5021   int can_negate = 0;
5022   int final_invert = 0;
5023   int i;
5024   int set_sign_bit_copies = 0;
5025   int clear_sign_bit_copies = 0;
5026   int clear_zero_bit_copies = 0;
5027   int set_zero_bit_copies = 0;
5028   int insns = 0, neg_insns, inv_insns;
5029   unsigned HOST_WIDE_INT temp1, temp2;
5030   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
5031   struct four_ints *immediates;
5032   struct four_ints pos_immediates, neg_immediates, inv_immediates;
5033
5034   /* Find out which operations are safe for a given CODE.  Also do a quick
5035      check for degenerate cases; these can occur when DImode operations
5036      are split.  */
5037   switch (code)
5038     {
5039     case SET:
5040       can_invert = 1;
5041       break;
5042
5043     case PLUS:
5044       can_negate = 1;
5045       break;
5046
5047     case IOR:
5048       if (remainder == 0xffffffff)
5049         {
5050           if (generate)
5051             emit_constant_insn (cond,
5052                                 gen_rtx_SET (target,
5053                                              GEN_INT (ARM_SIGN_EXTEND (val))));
5054           return 1;
5055         }
5056
5057       if (remainder == 0)
5058         {
5059           if (reload_completed && rtx_equal_p (target, source))
5060             return 0;
5061
5062           if (generate)
5063             emit_constant_insn (cond, gen_rtx_SET (target, source));
5064           return 1;
5065         }
5066       break;
5067
5068     case AND:
5069       if (remainder == 0)
5070         {
5071           if (generate)
5072             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5073           return 1;
5074         }
5075       if (remainder == 0xffffffff)
5076         {
5077           if (reload_completed && rtx_equal_p (target, source))
5078             return 0;
5079           if (generate)
5080             emit_constant_insn (cond, gen_rtx_SET (target, source));
5081           return 1;
5082         }
5083       can_invert = 1;
5084       break;
5085
5086     case XOR:
5087       if (remainder == 0)
5088         {
5089           if (reload_completed && rtx_equal_p (target, source))
5090             return 0;
5091           if (generate)
5092             emit_constant_insn (cond, gen_rtx_SET (target, source));
5093           return 1;
5094         }
5095
5096       if (remainder == 0xffffffff)
5097         {
5098           if (generate)
5099             emit_constant_insn (cond,
5100                                 gen_rtx_SET (target,
5101                                              gen_rtx_NOT (mode, source)));
5102           return 1;
5103         }
5104       final_invert = 1;
5105       break;
5106
5107     case MINUS:
5108       /* We treat MINUS as (val - source), since (source - val) is always
5109          passed as (source + (-val)).  */
5110       if (remainder == 0)
5111         {
5112           if (generate)
5113             emit_constant_insn (cond,
5114                                 gen_rtx_SET (target,
5115                                              gen_rtx_NEG (mode, source)));
5116           return 1;
5117         }
5118       if (const_ok_for_arm (val))
5119         {
5120           if (generate)
5121             emit_constant_insn (cond,
5122                                 gen_rtx_SET (target,
5123                                              gen_rtx_MINUS (mode, GEN_INT (val),
5124                                                             source)));
5125           return 1;
5126         }
5127
5128       break;
5129
5130     default:
5131       gcc_unreachable ();
5132     }
5133
5134   /* If we can do it in one insn get out quickly.  */
5135   if (const_ok_for_op (val, code))
5136     {
5137       if (generate)
5138         emit_constant_insn (cond,
5139                             gen_rtx_SET (target,
5140                                          (source
5141                                           ? gen_rtx_fmt_ee (code, mode, source,
5142                                                             GEN_INT (val))
5143                                           : GEN_INT (val))));
5144       return 1;
5145     }
5146
5147   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5148      insn.  */
5149   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5150       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5151     {
5152       if (generate)
5153         {
5154           if (mode == SImode && i == 16)
5155             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5156                smaller insn.  */
5157             emit_constant_insn (cond,
5158                                 gen_zero_extendhisi2
5159                                 (target, gen_lowpart (HImode, source)));
5160           else
5161             /* Extz only supports SImode, but we can coerce the operands
5162                into that mode.  */
5163             emit_constant_insn (cond,
5164                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5165                                               gen_lowpart (SImode, source),
5166                                               GEN_INT (i), const0_rtx));
5167         }
5168
5169       return 1;
5170     }
5171
5172   /* Calculate a few attributes that may be useful for specific
5173      optimizations.  */
5174   /* Count number of leading zeros.  */
5175   for (i = 31; i >= 0; i--)
5176     {
5177       if ((remainder & (1 << i)) == 0)
5178         clear_sign_bit_copies++;
5179       else
5180         break;
5181     }
5182
5183   /* Count number of leading 1's.  */
5184   for (i = 31; i >= 0; i--)
5185     {
5186       if ((remainder & (1 << i)) != 0)
5187         set_sign_bit_copies++;
5188       else
5189         break;
5190     }
5191
5192   /* Count number of trailing zero's.  */
5193   for (i = 0; i <= 31; i++)
5194     {
5195       if ((remainder & (1 << i)) == 0)
5196         clear_zero_bit_copies++;
5197       else
5198         break;
5199     }
5200
5201   /* Count number of trailing 1's.  */
5202   for (i = 0; i <= 31; i++)
5203     {
5204       if ((remainder & (1 << i)) != 0)
5205         set_zero_bit_copies++;
5206       else
5207         break;
5208     }
5209
5210   switch (code)
5211     {
5212     case SET:
5213       /* See if we can do this by sign_extending a constant that is known
5214          to be negative.  This is a good, way of doing it, since the shift
5215          may well merge into a subsequent insn.  */
5216       if (set_sign_bit_copies > 1)
5217         {
5218           if (const_ok_for_arm
5219               (temp1 = ARM_SIGN_EXTEND (remainder
5220                                         << (set_sign_bit_copies - 1))))
5221             {
5222               if (generate)
5223                 {
5224                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5225                   emit_constant_insn (cond,
5226                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5227                   emit_constant_insn (cond,
5228                                       gen_ashrsi3 (target, new_src,
5229                                                    GEN_INT (set_sign_bit_copies - 1)));
5230                 }
5231               return 2;
5232             }
5233           /* For an inverted constant, we will need to set the low bits,
5234              these will be shifted out of harm's way.  */
5235           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5236           if (const_ok_for_arm (~temp1))
5237             {
5238               if (generate)
5239                 {
5240                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5241                   emit_constant_insn (cond,
5242                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5243                   emit_constant_insn (cond,
5244                                       gen_ashrsi3 (target, new_src,
5245                                                    GEN_INT (set_sign_bit_copies - 1)));
5246                 }
5247               return 2;
5248             }
5249         }
5250
5251       /* See if we can calculate the value as the difference between two
5252          valid immediates.  */
5253       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5254         {
5255           int topshift = clear_sign_bit_copies & ~1;
5256
5257           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5258                                    & (0xff000000 >> topshift));
5259
5260           /* If temp1 is zero, then that means the 9 most significant
5261              bits of remainder were 1 and we've caused it to overflow.
5262              When topshift is 0 we don't need to do anything since we
5263              can borrow from 'bit 32'.  */
5264           if (temp1 == 0 && topshift != 0)
5265             temp1 = 0x80000000 >> (topshift - 1);
5266
5267           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5268
5269           if (const_ok_for_arm (temp2))
5270             {
5271               if (generate)
5272                 {
5273                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5274                   emit_constant_insn (cond,
5275                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5276                   emit_constant_insn (cond,
5277                                       gen_addsi3 (target, new_src,
5278                                                   GEN_INT (-temp2)));
5279                 }
5280
5281               return 2;
5282             }
5283         }
5284
5285       /* See if we can generate this by setting the bottom (or the top)
5286          16 bits, and then shifting these into the other half of the
5287          word.  We only look for the simplest cases, to do more would cost
5288          too much.  Be careful, however, not to generate this when the
5289          alternative would take fewer insns.  */
5290       if (val & 0xffff0000)
5291         {
5292           temp1 = remainder & 0xffff0000;
5293           temp2 = remainder & 0x0000ffff;
5294
5295           /* Overlaps outside this range are best done using other methods.  */
5296           for (i = 9; i < 24; i++)
5297             {
5298               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5299                   && !const_ok_for_arm (temp2))
5300                 {
5301                   rtx new_src = (subtargets
5302                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5303                                  : target);
5304                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5305                                             source, subtargets, generate);
5306                   source = new_src;
5307                   if (generate)
5308                     emit_constant_insn
5309                       (cond,
5310                        gen_rtx_SET
5311                        (target,
5312                         gen_rtx_IOR (mode,
5313                                      gen_rtx_ASHIFT (mode, source,
5314                                                      GEN_INT (i)),
5315                                      source)));
5316                   return insns + 1;
5317                 }
5318             }
5319
5320           /* Don't duplicate cases already considered.  */
5321           for (i = 17; i < 24; i++)
5322             {
5323               if (((temp1 | (temp1 >> i)) == remainder)
5324                   && !const_ok_for_arm (temp1))
5325                 {
5326                   rtx new_src = (subtargets
5327                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5328                                  : target);
5329                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5330                                             source, subtargets, generate);
5331                   source = new_src;
5332                   if (generate)
5333                     emit_constant_insn
5334                       (cond,
5335                        gen_rtx_SET (target,
5336                                     gen_rtx_IOR
5337                                     (mode,
5338                                      gen_rtx_LSHIFTRT (mode, source,
5339                                                        GEN_INT (i)),
5340                                      source)));
5341                   return insns + 1;
5342                 }
5343             }
5344         }
5345       break;
5346
5347     case IOR:
5348     case XOR:
5349       /* If we have IOR or XOR, and the constant can be loaded in a
5350          single instruction, and we can find a temporary to put it in,
5351          then this can be done in two instructions instead of 3-4.  */
5352       if (subtargets
5353           /* TARGET can't be NULL if SUBTARGETS is 0 */
5354           || (reload_completed && !reg_mentioned_p (target, source)))
5355         {
5356           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5357             {
5358               if (generate)
5359                 {
5360                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5361
5362                   emit_constant_insn (cond,
5363                                       gen_rtx_SET (sub, GEN_INT (val)));
5364                   emit_constant_insn (cond,
5365                                       gen_rtx_SET (target,
5366                                                    gen_rtx_fmt_ee (code, mode,
5367                                                                    source, sub)));
5368                 }
5369               return 2;
5370             }
5371         }
5372
5373       if (code == XOR)
5374         break;
5375
5376       /*  Convert.
5377           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5378                              and the remainder 0s for e.g. 0xfff00000)
5379           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5380
5381           This can be done in 2 instructions by using shifts with mov or mvn.
5382           e.g. for
5383           x = x | 0xfff00000;
5384           we generate.
5385           mvn   r0, r0, asl #12
5386           mvn   r0, r0, lsr #12  */
5387       if (set_sign_bit_copies > 8
5388           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5389         {
5390           if (generate)
5391             {
5392               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5393               rtx shift = GEN_INT (set_sign_bit_copies);
5394
5395               emit_constant_insn
5396                 (cond,
5397                  gen_rtx_SET (sub,
5398                               gen_rtx_NOT (mode,
5399                                            gen_rtx_ASHIFT (mode,
5400                                                            source,
5401                                                            shift))));
5402               emit_constant_insn
5403                 (cond,
5404                  gen_rtx_SET (target,
5405                               gen_rtx_NOT (mode,
5406                                            gen_rtx_LSHIFTRT (mode, sub,
5407                                                              shift))));
5408             }
5409           return 2;
5410         }
5411
5412       /* Convert
5413           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5414            to
5415           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5416
5417           For eg. r0 = r0 | 0xfff
5418                mvn      r0, r0, lsr #12
5419                mvn      r0, r0, asl #12
5420
5421       */
5422       if (set_zero_bit_copies > 8
5423           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5424         {
5425           if (generate)
5426             {
5427               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5428               rtx shift = GEN_INT (set_zero_bit_copies);
5429
5430               emit_constant_insn
5431                 (cond,
5432                  gen_rtx_SET (sub,
5433                               gen_rtx_NOT (mode,
5434                                            gen_rtx_LSHIFTRT (mode,
5435                                                              source,
5436                                                              shift))));
5437               emit_constant_insn
5438                 (cond,
5439                  gen_rtx_SET (target,
5440                               gen_rtx_NOT (mode,
5441                                            gen_rtx_ASHIFT (mode, sub,
5442                                                            shift))));
5443             }
5444           return 2;
5445         }
5446
5447       /* This will never be reached for Thumb2 because orn is a valid
5448          instruction. This is for Thumb1 and the ARM 32 bit cases.
5449
5450          x = y | constant (such that ~constant is a valid constant)
5451          Transform this to
5452          x = ~(~y & ~constant).
5453       */
5454       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5455         {
5456           if (generate)
5457             {
5458               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5459               emit_constant_insn (cond,
5460                                   gen_rtx_SET (sub,
5461                                                gen_rtx_NOT (mode, source)));
5462               source = sub;
5463               if (subtargets)
5464                 sub = gen_reg_rtx (mode);
5465               emit_constant_insn (cond,
5466                                   gen_rtx_SET (sub,
5467                                                gen_rtx_AND (mode, source,
5468                                                             GEN_INT (temp1))));
5469               emit_constant_insn (cond,
5470                                   gen_rtx_SET (target,
5471                                                gen_rtx_NOT (mode, sub)));
5472             }
5473           return 3;
5474         }
5475       break;
5476
5477     case AND:
5478       /* See if two shifts will do 2 or more insn's worth of work.  */
5479       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5480         {
5481           HOST_WIDE_INT shift_mask = ((0xffffffff
5482                                        << (32 - clear_sign_bit_copies))
5483                                       & 0xffffffff);
5484
5485           if ((remainder | shift_mask) != 0xffffffff)
5486             {
5487               HOST_WIDE_INT new_val
5488                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5489
5490               if (generate)
5491                 {
5492                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5493                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5494                                             new_src, source, subtargets, 1);
5495                   source = new_src;
5496                 }
5497               else
5498                 {
5499                   rtx targ = subtargets ? NULL_RTX : target;
5500                   insns = arm_gen_constant (AND, mode, cond, new_val,
5501                                             targ, source, subtargets, 0);
5502                 }
5503             }
5504
5505           if (generate)
5506             {
5507               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5508               rtx shift = GEN_INT (clear_sign_bit_copies);
5509
5510               emit_insn (gen_ashlsi3 (new_src, source, shift));
5511               emit_insn (gen_lshrsi3 (target, new_src, shift));
5512             }
5513
5514           return insns + 2;
5515         }
5516
5517       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5518         {
5519           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5520
5521           if ((remainder | shift_mask) != 0xffffffff)
5522             {
5523               HOST_WIDE_INT new_val
5524                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5525               if (generate)
5526                 {
5527                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5528
5529                   insns = arm_gen_constant (AND, mode, cond, new_val,
5530                                             new_src, source, subtargets, 1);
5531                   source = new_src;
5532                 }
5533               else
5534                 {
5535                   rtx targ = subtargets ? NULL_RTX : target;
5536
5537                   insns = arm_gen_constant (AND, mode, cond, new_val,
5538                                             targ, source, subtargets, 0);
5539                 }
5540             }
5541
5542           if (generate)
5543             {
5544               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5545               rtx shift = GEN_INT (clear_zero_bit_copies);
5546
5547               emit_insn (gen_lshrsi3 (new_src, source, shift));
5548               emit_insn (gen_ashlsi3 (target, new_src, shift));
5549             }
5550
5551           return insns + 2;
5552         }
5553
5554       break;
5555
5556     default:
5557       break;
5558     }
5559
5560   /* Calculate what the instruction sequences would be if we generated it
5561      normally, negated, or inverted.  */
5562   if (code == AND)
5563     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5564     insns = 99;
5565   else
5566     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5567
5568   if (can_negate)
5569     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5570                                             &neg_immediates);
5571   else
5572     neg_insns = 99;
5573
5574   if (can_invert || final_invert)
5575     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5576                                             &inv_immediates);
5577   else
5578     inv_insns = 99;
5579
5580   immediates = &pos_immediates;
5581
5582   /* Is the negated immediate sequence more efficient?  */
5583   if (neg_insns < insns && neg_insns <= inv_insns)
5584     {
5585       insns = neg_insns;
5586       immediates = &neg_immediates;
5587     }
5588   else
5589     can_negate = 0;
5590
5591   /* Is the inverted immediate sequence more efficient?
5592      We must allow for an extra NOT instruction for XOR operations, although
5593      there is some chance that the final 'mvn' will get optimized later.  */
5594   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5595     {
5596       insns = inv_insns;
5597       immediates = &inv_immediates;
5598     }
5599   else
5600     {
5601       can_invert = 0;
5602       final_invert = 0;
5603     }
5604
5605   /* Now output the chosen sequence as instructions.  */
5606   if (generate)
5607     {
5608       for (i = 0; i < insns; i++)
5609         {
5610           rtx new_src, temp1_rtx;
5611
5612           temp1 = immediates->i[i];
5613
5614           if (code == SET || code == MINUS)
5615             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5616           else if ((final_invert || i < (insns - 1)) && subtargets)
5617             new_src = gen_reg_rtx (mode);
5618           else
5619             new_src = target;
5620
5621           if (can_invert)
5622             temp1 = ~temp1;
5623           else if (can_negate)
5624             temp1 = -temp1;
5625
5626           temp1 = trunc_int_for_mode (temp1, mode);
5627           temp1_rtx = GEN_INT (temp1);
5628
5629           if (code == SET)
5630             ;
5631           else if (code == MINUS)
5632             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5633           else
5634             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5635
5636           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5637           source = new_src;
5638
5639           if (code == SET)
5640             {
5641               can_negate = can_invert;
5642               can_invert = 0;
5643               code = PLUS;
5644             }
5645           else if (code == MINUS)
5646             code = PLUS;
5647         }
5648     }
5649
5650   if (final_invert)
5651     {
5652       if (generate)
5653         emit_constant_insn (cond, gen_rtx_SET (target,
5654                                                gen_rtx_NOT (mode, source)));
5655       insns++;
5656     }
5657
5658   return insns;
5659 }
5660
5661 /* Return TRUE if op is a constant where both the low and top words are
5662    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5663    we do not have RSC in that case.  */
5664 static bool
5665 arm_const_double_prefer_rsbs_rsc (rtx op)
5666 {
5667   /* Thumb lacks RSC, so we never prefer that sequence.  */
5668   if (TARGET_THUMB || !CONST_INT_P (op))
5669     return false;
5670   HOST_WIDE_INT hi, lo;
5671   lo = UINTVAL (op) & 0xffffffffULL;
5672   hi = UINTVAL (op) >> 32;
5673   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5674 }
5675
5676 /* Canonicalize a comparison so that we are more likely to recognize it.
5677    This can be done for a few constant compares, where we can make the
5678    immediate value easier to load.  */
5679
5680 static void
5681 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5682                              bool op0_preserve_value)
5683 {
5684   machine_mode mode;
5685   unsigned HOST_WIDE_INT i, maxval;
5686
5687   mode = GET_MODE (*op0);
5688   if (mode == VOIDmode)
5689     mode = GET_MODE (*op1);
5690
5691   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5692
5693   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5694      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5695      either reversed or (for constant OP1) adjusted to GE/LT.
5696      Similarly for GTU/LEU in Thumb mode.  */
5697   if (mode == DImode)
5698     {
5699
5700       if (*code == GT || *code == LE
5701           || *code == GTU || *code == LEU)
5702         {
5703           /* Missing comparison.  First try to use an available
5704              comparison.  */
5705           if (CONST_INT_P (*op1))
5706             {
5707               i = INTVAL (*op1);
5708               switch (*code)
5709                 {
5710                 case GT:
5711                 case LE:
5712                   if (i != maxval)
5713                     {
5714                       /* Try to convert to GE/LT, unless that would be more
5715                          expensive.  */
5716                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5717                           && arm_const_double_prefer_rsbs_rsc (*op1))
5718                         return;
5719                       *op1 = GEN_INT (i + 1);
5720                       *code = *code == GT ? GE : LT;
5721                     }
5722                   else
5723                     {
5724                       /* GT maxval is always false, LE maxval is always true.
5725                          We can't fold that away here as we must make a
5726                          comparison, but we can fold them to comparisons
5727                          with the same result that can be handled:
5728                            op0 GT maxval -> op0 LT minval
5729                            op0 LE maxval -> op0 GE minval
5730                          where minval = (-maxval - 1).  */
5731                       *op1 = GEN_INT (-maxval - 1);
5732                       *code = *code == GT ? LT : GE;
5733                     }
5734                   return;
5735
5736                 case GTU:
5737                 case LEU:
5738                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5739                     {
5740                       /* Try to convert to GEU/LTU, unless that would
5741                          be more expensive.  */
5742                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5743                           && arm_const_double_prefer_rsbs_rsc (*op1))
5744                         return;
5745                       *op1 = GEN_INT (i + 1);
5746                       *code = *code == GTU ? GEU : LTU;
5747                     }
5748                   else
5749                     {
5750                       /* GTU ~0 is always false, LEU ~0 is always true.
5751                          We can't fold that away here as we must make a
5752                          comparison, but we can fold them to comparisons
5753                          with the same result that can be handled:
5754                            op0 GTU ~0 -> op0 LTU 0
5755                            op0 LEU ~0 -> op0 GEU 0.  */
5756                       *op1 = const0_rtx;
5757                       *code = *code == GTU ? LTU : GEU;
5758                     }
5759                   return;
5760
5761                 default:
5762                   gcc_unreachable ();
5763                 }
5764             }
5765
5766           if (!op0_preserve_value)
5767             {
5768               std::swap (*op0, *op1);
5769               *code = (int)swap_condition ((enum rtx_code)*code);
5770             }
5771         }
5772       return;
5773     }
5774
5775   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5776      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5777      to facilitate possible combining with a cmp into 'ands'.  */
5778   if (mode == SImode
5779       && GET_CODE (*op0) == ZERO_EXTEND
5780       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5781       && GET_MODE (XEXP (*op0, 0)) == QImode
5782       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5783       && subreg_lowpart_p (XEXP (*op0, 0))
5784       && *op1 == const0_rtx)
5785     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5786                         GEN_INT (255));
5787
5788   /* Comparisons smaller than DImode.  Only adjust comparisons against
5789      an out-of-range constant.  */
5790   if (!CONST_INT_P (*op1)
5791       || const_ok_for_arm (INTVAL (*op1))
5792       || const_ok_for_arm (- INTVAL (*op1)))
5793     return;
5794
5795   i = INTVAL (*op1);
5796
5797   switch (*code)
5798     {
5799     case EQ:
5800     case NE:
5801       return;
5802
5803     case GT:
5804     case LE:
5805       if (i != maxval
5806           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5807         {
5808           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5809           *code = *code == GT ? GE : LT;
5810           return;
5811         }
5812       break;
5813
5814     case GE:
5815     case LT:
5816       if (i != ~maxval
5817           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5818         {
5819           *op1 = GEN_INT (i - 1);
5820           *code = *code == GE ? GT : LE;
5821           return;
5822         }
5823       break;
5824
5825     case GTU:
5826     case LEU:
5827       if (i != ~((unsigned HOST_WIDE_INT) 0)
5828           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5829         {
5830           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5831           *code = *code == GTU ? GEU : LTU;
5832           return;
5833         }
5834       break;
5835
5836     case GEU:
5837     case LTU:
5838       if (i != 0
5839           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5840         {
5841           *op1 = GEN_INT (i - 1);
5842           *code = *code == GEU ? GTU : LEU;
5843           return;
5844         }
5845       break;
5846
5847     default:
5848       gcc_unreachable ();
5849     }
5850 }
5851
5852
5853 /* Define how to find the value returned by a function.  */
5854
5855 static rtx
5856 arm_function_value(const_tree type, const_tree func,
5857                    bool outgoing ATTRIBUTE_UNUSED)
5858 {
5859   machine_mode mode;
5860   int unsignedp ATTRIBUTE_UNUSED;
5861   rtx r ATTRIBUTE_UNUSED;
5862
5863   mode = TYPE_MODE (type);
5864
5865   if (TARGET_AAPCS_BASED)
5866     return aapcs_allocate_return_reg (mode, type, func);
5867
5868   /* Promote integer types.  */
5869   if (INTEGRAL_TYPE_P (type))
5870     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5871
5872   /* Promotes small structs returned in a register to full-word size
5873      for big-endian AAPCS.  */
5874   if (arm_return_in_msb (type))
5875     {
5876       HOST_WIDE_INT size = int_size_in_bytes (type);
5877       if (size % UNITS_PER_WORD != 0)
5878         {
5879           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5880           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5881         }
5882     }
5883
5884   return arm_libcall_value_1 (mode);
5885 }
5886
5887 /* libcall hashtable helpers.  */
5888
5889 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5890 {
5891   static inline hashval_t hash (const rtx_def *);
5892   static inline bool equal (const rtx_def *, const rtx_def *);
5893   static inline void remove (rtx_def *);
5894 };
5895
5896 inline bool
5897 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5898 {
5899   return rtx_equal_p (p1, p2);
5900 }
5901
5902 inline hashval_t
5903 libcall_hasher::hash (const rtx_def *p1)
5904 {
5905   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5906 }
5907
5908 typedef hash_table<libcall_hasher> libcall_table_type;
5909
5910 static void
5911 add_libcall (libcall_table_type *htab, rtx libcall)
5912 {
5913   *htab->find_slot (libcall, INSERT) = libcall;
5914 }
5915
5916 static bool
5917 arm_libcall_uses_aapcs_base (const_rtx libcall)
5918 {
5919   static bool init_done = false;
5920   static libcall_table_type *libcall_htab = NULL;
5921
5922   if (!init_done)
5923     {
5924       init_done = true;
5925
5926       libcall_htab = new libcall_table_type (31);
5927       add_libcall (libcall_htab,
5928                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5929       add_libcall (libcall_htab,
5930                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5931       add_libcall (libcall_htab,
5932                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5933       add_libcall (libcall_htab,
5934                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5935
5936       add_libcall (libcall_htab,
5937                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5938       add_libcall (libcall_htab,
5939                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5940       add_libcall (libcall_htab,
5941                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5942       add_libcall (libcall_htab,
5943                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5944
5945       add_libcall (libcall_htab,
5946                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5947       add_libcall (libcall_htab,
5948                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5949       add_libcall (libcall_htab,
5950                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5951       add_libcall (libcall_htab,
5952                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5953       add_libcall (libcall_htab,
5954                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5955       add_libcall (libcall_htab,
5956                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5957       add_libcall (libcall_htab,
5958                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5959       add_libcall (libcall_htab,
5960                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5961       add_libcall (libcall_htab,
5962                    convert_optab_libfunc (sfix_optab, SImode, SFmode));
5963       add_libcall (libcall_htab,
5964                    convert_optab_libfunc (ufix_optab, SImode, SFmode));
5965
5966       /* Values from double-precision helper functions are returned in core
5967          registers if the selected core only supports single-precision
5968          arithmetic, even if we are using the hard-float ABI.  The same is
5969          true for single-precision helpers except in case of MVE, because in
5970          MVE we will be using the hard-float ABI on a CPU which doesn't support
5971          single-precision operations in hardware.  In MVE the following check
5972          enables use of emulation for the single-precision arithmetic
5973          operations.  */
5974       if (TARGET_HAVE_MVE)
5975         {
5976           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5977           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5978           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5979           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5980           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5981           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5982           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5983           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5984           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5985           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5986           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5987         }
5988       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5989       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5990       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5991       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5992       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5993       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5994       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5995       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5996       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5997       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5998       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5999       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
6000                                                         SFmode));
6001       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
6002                                                         DFmode));
6003       add_libcall (libcall_htab,
6004                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
6005     }
6006
6007   return libcall && libcall_htab->find (libcall) != NULL;
6008 }
6009
6010 static rtx
6011 arm_libcall_value_1 (machine_mode mode)
6012 {
6013   if (TARGET_AAPCS_BASED)
6014     return aapcs_libcall_value (mode);
6015   else if (TARGET_IWMMXT_ABI
6016            && arm_vector_mode_supported_p (mode))
6017     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
6018   else
6019     return gen_rtx_REG (mode, ARG_REGISTER (1));
6020 }
6021
6022 /* Define how to find the value returned by a library function
6023    assuming the value has mode MODE.  */
6024
6025 static rtx
6026 arm_libcall_value (machine_mode mode, const_rtx libcall)
6027 {
6028   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
6029       && GET_MODE_CLASS (mode) == MODE_FLOAT)
6030     {
6031       /* The following libcalls return their result in integer registers,
6032          even though they return a floating point value.  */
6033       if (arm_libcall_uses_aapcs_base (libcall))
6034         return gen_rtx_REG (mode, ARG_REGISTER(1));
6035
6036     }
6037
6038   return arm_libcall_value_1 (mode);
6039 }
6040
6041 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
6042
6043 static bool
6044 arm_function_value_regno_p (const unsigned int regno)
6045 {
6046   if (regno == ARG_REGISTER (1)
6047       || (TARGET_32BIT
6048           && TARGET_AAPCS_BASED
6049           && TARGET_HARD_FLOAT
6050           && regno == FIRST_VFP_REGNUM)
6051       || (TARGET_IWMMXT_ABI
6052           && regno == FIRST_IWMMXT_REGNUM))
6053     return true;
6054
6055   return false;
6056 }
6057
6058 /* Determine the amount of memory needed to store the possible return
6059    registers of an untyped call.  */
6060 int
6061 arm_apply_result_size (void)
6062 {
6063   int size = 16;
6064
6065   if (TARGET_32BIT)
6066     {
6067       if (TARGET_HARD_FLOAT_ABI)
6068         size += 32;
6069       if (TARGET_IWMMXT_ABI)
6070         size += 8;
6071     }
6072
6073   return size;
6074 }
6075
6076 /* Decide whether TYPE should be returned in memory (true)
6077    or in a register (false).  FNTYPE is the type of the function making
6078    the call.  */
6079 static bool
6080 arm_return_in_memory (const_tree type, const_tree fntype)
6081 {
6082   HOST_WIDE_INT size;
6083
6084   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
6085
6086   if (TARGET_AAPCS_BASED)
6087     {
6088       /* Simple, non-aggregate types (ie not including vectors and
6089          complex) are always returned in a register (or registers).
6090          We don't care about which register here, so we can short-cut
6091          some of the detail.  */
6092       if (!AGGREGATE_TYPE_P (type)
6093           && TREE_CODE (type) != VECTOR_TYPE
6094           && TREE_CODE (type) != COMPLEX_TYPE)
6095         return false;
6096
6097       /* Any return value that is no larger than one word can be
6098          returned in r0.  */
6099       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6100         return false;
6101
6102       /* Check any available co-processors to see if they accept the
6103          type as a register candidate (VFP, for example, can return
6104          some aggregates in consecutive registers).  These aren't
6105          available if the call is variadic.  */
6106       if (aapcs_select_return_coproc (type, fntype) >= 0)
6107         return false;
6108
6109       /* Vector values should be returned using ARM registers, not
6110          memory (unless they're over 16 bytes, which will break since
6111          we only have four call-clobbered registers to play with).  */
6112       if (TREE_CODE (type) == VECTOR_TYPE)
6113         return (size < 0 || size > (4 * UNITS_PER_WORD));
6114
6115       /* The rest go in memory.  */
6116       return true;
6117     }
6118
6119   if (TREE_CODE (type) == VECTOR_TYPE)
6120     return (size < 0 || size > (4 * UNITS_PER_WORD));
6121
6122   if (!AGGREGATE_TYPE_P (type) &&
6123       (TREE_CODE (type) != VECTOR_TYPE))
6124     /* All simple types are returned in registers.  */
6125     return false;
6126
6127   if (arm_abi != ARM_ABI_APCS)
6128     {
6129       /* ATPCS and later return aggregate types in memory only if they are
6130          larger than a word (or are variable size).  */
6131       return (size < 0 || size > UNITS_PER_WORD);
6132     }
6133
6134   /* For the arm-wince targets we choose to be compatible with Microsoft's
6135      ARM and Thumb compilers, which always return aggregates in memory.  */
6136 #ifndef ARM_WINCE
6137   /* All structures/unions bigger than one word are returned in memory.
6138      Also catch the case where int_size_in_bytes returns -1.  In this case
6139      the aggregate is either huge or of variable size, and in either case
6140      we will want to return it via memory and not in a register.  */
6141   if (size < 0 || size > UNITS_PER_WORD)
6142     return true;
6143
6144   if (TREE_CODE (type) == RECORD_TYPE)
6145     {
6146       tree field;
6147
6148       /* For a struct the APCS says that we only return in a register
6149          if the type is 'integer like' and every addressable element
6150          has an offset of zero.  For practical purposes this means
6151          that the structure can have at most one non bit-field element
6152          and that this element must be the first one in the structure.  */
6153
6154       /* Find the first field, ignoring non FIELD_DECL things which will
6155          have been created by C++.  */
6156       /* NOTE: This code is deprecated and has not been updated to handle
6157          DECL_FIELD_ABI_IGNORED.  */
6158       for (field = TYPE_FIELDS (type);
6159            field && TREE_CODE (field) != FIELD_DECL;
6160            field = DECL_CHAIN (field))
6161         continue;
6162
6163       if (field == NULL)
6164         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6165
6166       /* Check that the first field is valid for returning in a register.  */
6167
6168       /* ... Floats are not allowed */
6169       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6170         return true;
6171
6172       /* ... Aggregates that are not themselves valid for returning in
6173          a register are not allowed.  */
6174       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6175         return true;
6176
6177       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6178          since they are not addressable.  */
6179       for (field = DECL_CHAIN (field);
6180            field;
6181            field = DECL_CHAIN (field))
6182         {
6183           if (TREE_CODE (field) != FIELD_DECL)
6184             continue;
6185
6186           if (!DECL_BIT_FIELD_TYPE (field))
6187             return true;
6188         }
6189
6190       return false;
6191     }
6192
6193   if (TREE_CODE (type) == UNION_TYPE)
6194     {
6195       tree field;
6196
6197       /* Unions can be returned in registers if every element is
6198          integral, or can be returned in an integer register.  */
6199       for (field = TYPE_FIELDS (type);
6200            field;
6201            field = DECL_CHAIN (field))
6202         {
6203           if (TREE_CODE (field) != FIELD_DECL)
6204             continue;
6205
6206           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6207             return true;
6208
6209           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6210             return true;
6211         }
6212
6213       return false;
6214     }
6215 #endif /* not ARM_WINCE */
6216
6217   /* Return all other types in memory.  */
6218   return true;
6219 }
6220
6221 const struct pcs_attribute_arg
6222 {
6223   const char *arg;
6224   enum arm_pcs value;
6225 } pcs_attribute_args[] =
6226   {
6227     {"aapcs", ARM_PCS_AAPCS},
6228     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6229 #if 0
6230     /* We could recognize these, but changes would be needed elsewhere
6231      * to implement them.  */
6232     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6233     {"atpcs", ARM_PCS_ATPCS},
6234     {"apcs", ARM_PCS_APCS},
6235 #endif
6236     {NULL, ARM_PCS_UNKNOWN}
6237   };
6238
6239 static enum arm_pcs
6240 arm_pcs_from_attribute (tree attr)
6241 {
6242   const struct pcs_attribute_arg *ptr;
6243   const char *arg;
6244
6245   /* Get the value of the argument.  */
6246   if (TREE_VALUE (attr) == NULL_TREE
6247       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6248     return ARM_PCS_UNKNOWN;
6249
6250   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6251
6252   /* Check it against the list of known arguments.  */
6253   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6254     if (streq (arg, ptr->arg))
6255       return ptr->value;
6256
6257   /* An unrecognized interrupt type.  */
6258   return ARM_PCS_UNKNOWN;
6259 }
6260
6261 /* Get the PCS variant to use for this call.  TYPE is the function's type
6262    specification, DECL is the specific declartion.  DECL may be null if
6263    the call could be indirect or if this is a library call.  */
6264 static enum arm_pcs
6265 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6266 {
6267   bool user_convention = false;
6268   enum arm_pcs user_pcs = arm_pcs_default;
6269   tree attr;
6270
6271   gcc_assert (type);
6272
6273   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6274   if (attr)
6275     {
6276       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6277       user_convention = true;
6278     }
6279
6280   if (TARGET_AAPCS_BASED)
6281     {
6282       /* Detect varargs functions.  These always use the base rules
6283          (no argument is ever a candidate for a co-processor
6284          register).  */
6285       bool base_rules = stdarg_p (type);
6286
6287       if (user_convention)
6288         {
6289           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6290             sorry ("non-AAPCS derived PCS variant");
6291           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6292             error ("variadic functions must use the base AAPCS variant");
6293         }
6294
6295       if (base_rules)
6296         return ARM_PCS_AAPCS;
6297       else if (user_convention)
6298         return user_pcs;
6299 #if 0
6300       /* Unfortunately, this is not safe and can lead to wrong code
6301          being generated (PR96882).  Not all calls into the back-end
6302          pass the DECL, so it is unsafe to make any PCS-changing
6303          decisions based on it.  In particular the RETURN_IN_MEMORY
6304          hook is only ever passed a TYPE.  This needs revisiting to
6305          see if there are any partial improvements that can be
6306          re-enabled.  */
6307       else if (decl && flag_unit_at_a_time)
6308         {
6309           /* Local functions never leak outside this compilation unit,
6310              so we are free to use whatever conventions are
6311              appropriate.  */
6312           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6313           cgraph_node *local_info_node
6314             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6315           if (local_info_node && local_info_node->local)
6316             return ARM_PCS_AAPCS_LOCAL;
6317         }
6318 #endif
6319     }
6320   else if (user_convention && user_pcs != arm_pcs_default)
6321     sorry ("PCS variant");
6322
6323   /* For everything else we use the target's default.  */
6324   return arm_pcs_default;
6325 }
6326
6327
6328 static void
6329 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6330                     const_tree fntype ATTRIBUTE_UNUSED,
6331                     rtx libcall ATTRIBUTE_UNUSED,
6332                     const_tree fndecl ATTRIBUTE_UNUSED)
6333 {
6334   /* Record the unallocated VFP registers.  */
6335   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6336   pcum->aapcs_vfp_reg_alloc = 0;
6337 }
6338
6339 /* Bitmasks that indicate whether earlier versions of GCC would have
6340    taken a different path through the ABI logic.  This should result in
6341    a -Wpsabi warning if the earlier path led to a different ABI decision.
6342
6343    WARN_PSABI_EMPTY_CXX17_BASE
6344       Indicates that the type includes an artificial empty C++17 base field
6345       that, prior to GCC 10.1, would prevent the type from being treated as
6346       a HFA or HVA.  See PR94711 for details.
6347
6348    WARN_PSABI_NO_UNIQUE_ADDRESS
6349       Indicates that the type includes an empty [[no_unique_address]] field
6350       that, prior to GCC 10.1, would prevent the type from being treated as
6351       a HFA or HVA.  */
6352 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6353 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6354 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6355
6356 /* Walk down the type tree of TYPE counting consecutive base elements.
6357    If *MODEP is VOIDmode, then set it to the first valid floating point
6358    type.  If a non-floating point type is found, or if a floating point
6359    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6360    otherwise return the count in the sub-tree.
6361
6362    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6363    function has changed its behavior relative to earlier versions of GCC.
6364    Normally the argument should be nonnull and point to a zero-initialized
6365    variable.  The function then records whether the ABI decision might
6366    be affected by a known fix to the ABI logic, setting the associated
6367    WARN_PSABI_* bits if so.
6368
6369    When the argument is instead a null pointer, the function tries to
6370    simulate the behavior of GCC before all such ABI fixes were made.
6371    This is useful to check whether the function returns something
6372    different after the ABI fixes.  */
6373 static int
6374 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6375                          unsigned int *warn_psabi_flags)
6376 {
6377   machine_mode mode;
6378   HOST_WIDE_INT size;
6379
6380   switch (TREE_CODE (type))
6381     {
6382     case REAL_TYPE:
6383       mode = TYPE_MODE (type);
6384       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6385         return -1;
6386
6387       if (*modep == VOIDmode)
6388         *modep = mode;
6389
6390       if (*modep == mode)
6391         return 1;
6392
6393       break;
6394
6395     case COMPLEX_TYPE:
6396       mode = TYPE_MODE (TREE_TYPE (type));
6397       if (mode != DFmode && mode != SFmode)
6398         return -1;
6399
6400       if (*modep == VOIDmode)
6401         *modep = mode;
6402
6403       if (*modep == mode)
6404         return 2;
6405
6406       break;
6407
6408     case VECTOR_TYPE:
6409       /* Use V2SImode and V4SImode as representatives of all 64-bit
6410          and 128-bit vector types, whether or not those modes are
6411          supported with the present options.  */
6412       size = int_size_in_bytes (type);
6413       switch (size)
6414         {
6415         case 8:
6416           mode = V2SImode;
6417           break;
6418         case 16:
6419           mode = V4SImode;
6420           break;
6421         default:
6422           return -1;
6423         }
6424
6425       if (*modep == VOIDmode)
6426         *modep = mode;
6427
6428       /* Vector modes are considered to be opaque: two vectors are
6429          equivalent for the purposes of being homogeneous aggregates
6430          if they are the same size.  */
6431       if (*modep == mode)
6432         return 1;
6433
6434       break;
6435
6436     case ARRAY_TYPE:
6437       {
6438         int count;
6439         tree index = TYPE_DOMAIN (type);
6440
6441         /* Can't handle incomplete types nor sizes that are not
6442            fixed.  */
6443         if (!COMPLETE_TYPE_P (type)
6444             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6445           return -1;
6446
6447         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6448                                          warn_psabi_flags);
6449         if (count == -1
6450             || !index
6451             || !TYPE_MAX_VALUE (index)
6452             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6453             || !TYPE_MIN_VALUE (index)
6454             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6455             || count < 0)
6456           return -1;
6457
6458         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6459                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6460
6461         /* There must be no padding.  */
6462         if (wi::to_wide (TYPE_SIZE (type))
6463             != count * GET_MODE_BITSIZE (*modep))
6464           return -1;
6465
6466         return count;
6467       }
6468
6469     case RECORD_TYPE:
6470       {
6471         int count = 0;
6472         int sub_count;
6473         tree field;
6474
6475         /* Can't handle incomplete types nor sizes that are not
6476            fixed.  */
6477         if (!COMPLETE_TYPE_P (type)
6478             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6479           return -1;
6480
6481         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6482           {
6483             if (TREE_CODE (field) != FIELD_DECL)
6484               continue;
6485
6486             if (DECL_FIELD_ABI_IGNORED (field))
6487               {
6488                 /* See whether this is something that earlier versions of
6489                    GCC failed to ignore.  */
6490                 unsigned int flag;
6491                 if (lookup_attribute ("no_unique_address",
6492                                       DECL_ATTRIBUTES (field)))
6493                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6494                 else if (cxx17_empty_base_field_p (field))
6495                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6496                 else
6497                   /* No compatibility problem.  */
6498                   continue;
6499
6500                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6501                 if (warn_psabi_flags)
6502                   {
6503                     *warn_psabi_flags |= flag;
6504                     continue;
6505                   }
6506               }
6507             /* A zero-width bitfield may affect layout in some
6508                circumstances, but adds no members.  The determination
6509                of whether or not a type is an HFA is performed after
6510                layout is complete, so if the type still looks like an
6511                HFA afterwards, it is still classed as one.  This is
6512                potentially an ABI break for the hard-float ABI.  */
6513             else if (DECL_BIT_FIELD (field)
6514                      && integer_zerop (DECL_SIZE (field)))
6515               {
6516                 /* Prior to GCC-12 these fields were striped early,
6517                    hiding them from the back-end entirely and
6518                    resulting in the correct behaviour for argument
6519                    passing.  Simulate that old behaviour without
6520                    generating a warning.  */
6521                 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6522                   continue;
6523                 if (warn_psabi_flags)
6524                   {
6525                     *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6526                     continue;
6527                   }
6528               }
6529
6530             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6531                                                  warn_psabi_flags);
6532             if (sub_count < 0)
6533               return -1;
6534             count += sub_count;
6535           }
6536
6537         /* There must be no padding.  */
6538         if (wi::to_wide (TYPE_SIZE (type))
6539             != count * GET_MODE_BITSIZE (*modep))
6540           return -1;
6541
6542         return count;
6543       }
6544
6545     case UNION_TYPE:
6546     case QUAL_UNION_TYPE:
6547       {
6548         /* These aren't very interesting except in a degenerate case.  */
6549         int count = 0;
6550         int sub_count;
6551         tree field;
6552
6553         /* Can't handle incomplete types nor sizes that are not
6554            fixed.  */
6555         if (!COMPLETE_TYPE_P (type)
6556             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6557           return -1;
6558
6559         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6560           {
6561             if (TREE_CODE (field) != FIELD_DECL)
6562               continue;
6563
6564             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6565                                                  warn_psabi_flags);
6566             if (sub_count < 0)
6567               return -1;
6568             count = count > sub_count ? count : sub_count;
6569           }
6570
6571         /* There must be no padding.  */
6572         if (wi::to_wide (TYPE_SIZE (type))
6573             != count * GET_MODE_BITSIZE (*modep))
6574           return -1;
6575
6576         return count;
6577       }
6578
6579     default:
6580       break;
6581     }
6582
6583   return -1;
6584 }
6585
6586 /* Return true if PCS_VARIANT should use VFP registers.  */
6587 static bool
6588 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6589 {
6590   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6591     {
6592       static bool seen_thumb1_vfp = false;
6593
6594       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6595         {
6596           sorry ("Thumb-1 %<hard-float%> VFP ABI");
6597           /* sorry() is not immediately fatal, so only display this once.  */
6598           seen_thumb1_vfp = true;
6599         }
6600
6601       return true;
6602     }
6603
6604   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6605     return false;
6606
6607   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6608          (TARGET_VFP_DOUBLE || !is_double));
6609 }
6610
6611 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6612    suitable for passing or returning in VFP registers for the PCS
6613    variant selected.  If it is, then *BASE_MODE is updated to contain
6614    a machine mode describing each element of the argument's type and
6615    *COUNT to hold the number of such elements.  */
6616 static bool
6617 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6618                                        machine_mode mode, const_tree type,
6619                                        machine_mode *base_mode, int *count)
6620 {
6621   machine_mode new_mode = VOIDmode;
6622
6623   /* If we have the type information, prefer that to working things
6624      out from the mode.  */
6625   if (type)
6626     {
6627       unsigned int warn_psabi_flags = 0;
6628       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6629                                               &warn_psabi_flags);
6630       if (ag_count > 0 && ag_count <= 4)
6631         {
6632           static unsigned last_reported_type_uid;
6633           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6634           int alt;
6635           if (warn_psabi
6636               && warn_psabi_flags
6637               && uid != last_reported_type_uid
6638               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6639                   != ag_count))
6640             {
6641               const char *url10
6642                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6643               const char *url12
6644                 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6645               gcc_assert (alt == -1);
6646               last_reported_type_uid = uid;
6647               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6648                  qualification.  */
6649               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6650                 inform (input_location, "parameter passing for argument of "
6651                         "type %qT with %<[[no_unique_address]]%> members "
6652                         "changed %{in GCC 10.1%}",
6653                         TYPE_MAIN_VARIANT (type), url10);
6654               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6655                 inform (input_location, "parameter passing for argument of "
6656                         "type %qT when C++17 is enabled changed to match "
6657                         "C++14 %{in GCC 10.1%}",
6658                         TYPE_MAIN_VARIANT (type), url10);
6659               else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6660                 inform (input_location, "parameter passing for argument of "
6661                         "type %qT changed %{in GCC 12.1%}",
6662                         TYPE_MAIN_VARIANT (type), url12);
6663             }
6664           *count = ag_count;
6665         }
6666       else
6667         return false;
6668     }
6669   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6670            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6671            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6672     {
6673       *count = 1;
6674       new_mode = mode;
6675     }
6676   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6677     {
6678       *count = 2;
6679       new_mode = (mode == DCmode ? DFmode : SFmode);
6680     }
6681   else
6682     return false;
6683
6684
6685   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6686     return false;
6687
6688   *base_mode = new_mode;
6689
6690   if (TARGET_GENERAL_REGS_ONLY)
6691     error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6692            type);
6693
6694   return true;
6695 }
6696
6697 static bool
6698 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6699                                machine_mode mode, const_tree type)
6700 {
6701   int count ATTRIBUTE_UNUSED;
6702   machine_mode ag_mode ATTRIBUTE_UNUSED;
6703
6704   if (!use_vfp_abi (pcs_variant, false))
6705     return false;
6706   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6707                                                 &ag_mode, &count);
6708 }
6709
6710 static bool
6711 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6712                              const_tree type)
6713 {
6714   if (!use_vfp_abi (pcum->pcs_variant, false))
6715     return false;
6716
6717   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6718                                                 &pcum->aapcs_vfp_rmode,
6719                                                 &pcum->aapcs_vfp_rcount);
6720 }
6721
6722 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6723    for the behaviour of this function.  */
6724
6725 static bool
6726 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6727                     const_tree type  ATTRIBUTE_UNUSED)
6728 {
6729   int rmode_size
6730     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6731   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6732   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6733   int regno;
6734
6735   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6736     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6737       {
6738         pcum->aapcs_vfp_reg_alloc = mask << regno;
6739         if (mode == BLKmode
6740             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6741             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6742           {
6743             int i;
6744             int rcount = pcum->aapcs_vfp_rcount;
6745             int rshift = shift;
6746             machine_mode rmode = pcum->aapcs_vfp_rmode;
6747             rtx par;
6748             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6749               {
6750                 /* Avoid using unsupported vector modes.  */
6751                 if (rmode == V2SImode)
6752                   rmode = DImode;
6753                 else if (rmode == V4SImode)
6754                   {
6755                     rmode = DImode;
6756                     rcount *= 2;
6757                     rshift /= 2;
6758                   }
6759               }
6760             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6761             for (i = 0; i < rcount; i++)
6762               {
6763                 rtx tmp = gen_rtx_REG (rmode,
6764                                        FIRST_VFP_REGNUM + regno + i * rshift);
6765                 tmp = gen_rtx_EXPR_LIST
6766                   (VOIDmode, tmp,
6767                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6768                 XVECEXP (par, 0, i) = tmp;
6769               }
6770
6771             pcum->aapcs_reg = par;
6772           }
6773         else
6774           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6775         return true;
6776       }
6777   return false;
6778 }
6779
6780 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6781    comment there for the behaviour of this function.  */
6782
6783 static rtx
6784 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6785                                machine_mode mode,
6786                                const_tree type ATTRIBUTE_UNUSED)
6787 {
6788   if (!use_vfp_abi (pcs_variant, false))
6789     return NULL;
6790
6791   if (mode == BLKmode
6792       || (GET_MODE_CLASS (mode) == MODE_INT
6793           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6794           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6795     {
6796       int count;
6797       machine_mode ag_mode;
6798       int i;
6799       rtx par;
6800       int shift;
6801
6802       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6803                                              &ag_mode, &count);
6804
6805       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6806         {
6807           if (ag_mode == V2SImode)
6808             ag_mode = DImode;
6809           else if (ag_mode == V4SImode)
6810             {
6811               ag_mode = DImode;
6812               count *= 2;
6813             }
6814         }
6815       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6816       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6817       for (i = 0; i < count; i++)
6818         {
6819           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6820           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6821                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6822           XVECEXP (par, 0, i) = tmp;
6823         }
6824
6825       return par;
6826     }
6827
6828   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6829 }
6830
6831 static void
6832 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6833                    machine_mode mode  ATTRIBUTE_UNUSED,
6834                    const_tree type  ATTRIBUTE_UNUSED)
6835 {
6836   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6837   pcum->aapcs_vfp_reg_alloc = 0;
6838   return;
6839 }
6840
6841 #define AAPCS_CP(X)                             \
6842   {                                             \
6843     aapcs_ ## X ## _cum_init,                   \
6844     aapcs_ ## X ## _is_call_candidate,          \
6845     aapcs_ ## X ## _allocate,                   \
6846     aapcs_ ## X ## _is_return_candidate,        \
6847     aapcs_ ## X ## _allocate_return_reg,        \
6848     aapcs_ ## X ## _advance                     \
6849   }
6850
6851 /* Table of co-processors that can be used to pass arguments in
6852    registers.  Idealy no arugment should be a candidate for more than
6853    one co-processor table entry, but the table is processed in order
6854    and stops after the first match.  If that entry then fails to put
6855    the argument into a co-processor register, the argument will go on
6856    the stack.  */
6857 static struct
6858 {
6859   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6860   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6861
6862   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6863      BLKmode) is a candidate for this co-processor's registers; this
6864      function should ignore any position-dependent state in
6865      CUMULATIVE_ARGS and only use call-type dependent information.  */
6866   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6867
6868   /* Return true if the argument does get a co-processor register; it
6869      should set aapcs_reg to an RTX of the register allocated as is
6870      required for a return from FUNCTION_ARG.  */
6871   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6872
6873   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6874      be returned in this co-processor's registers.  */
6875   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6876
6877   /* Allocate and return an RTX element to hold the return type of a call.  This
6878      routine must not fail and will only be called if is_return_candidate
6879      returned true with the same parameters.  */
6880   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6881
6882   /* Finish processing this argument and prepare to start processing
6883      the next one.  */
6884   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6885 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6886   {
6887     AAPCS_CP(vfp)
6888   };
6889
6890 #undef AAPCS_CP
6891
6892 static int
6893 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6894                           const_tree type)
6895 {
6896   int i;
6897
6898   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6899     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6900       return i;
6901
6902   return -1;
6903 }
6904
6905 static int
6906 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6907 {
6908   /* We aren't passed a decl, so we can't check that a call is local.
6909      However, it isn't clear that that would be a win anyway, since it
6910      might limit some tail-calling opportunities.  */
6911   enum arm_pcs pcs_variant;
6912
6913   if (fntype)
6914     {
6915       const_tree fndecl = NULL_TREE;
6916
6917       if (TREE_CODE (fntype) == FUNCTION_DECL)
6918         {
6919           fndecl = fntype;
6920           fntype = TREE_TYPE (fntype);
6921         }
6922
6923       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6924     }
6925   else
6926     pcs_variant = arm_pcs_default;
6927
6928   if (pcs_variant != ARM_PCS_AAPCS)
6929     {
6930       int i;
6931
6932       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6933         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6934                                                         TYPE_MODE (type),
6935                                                         type))
6936           return i;
6937     }
6938   return -1;
6939 }
6940
6941 static rtx
6942 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6943                            const_tree fntype)
6944 {
6945   /* We aren't passed a decl, so we can't check that a call is local.
6946      However, it isn't clear that that would be a win anyway, since it
6947      might limit some tail-calling opportunities.  */
6948   enum arm_pcs pcs_variant;
6949   int unsignedp ATTRIBUTE_UNUSED;
6950
6951   if (fntype)
6952     {
6953       const_tree fndecl = NULL_TREE;
6954
6955       if (TREE_CODE (fntype) == FUNCTION_DECL)
6956         {
6957           fndecl = fntype;
6958           fntype = TREE_TYPE (fntype);
6959         }
6960
6961       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6962     }
6963   else
6964     pcs_variant = arm_pcs_default;
6965
6966   /* Promote integer types.  */
6967   if (type && INTEGRAL_TYPE_P (type))
6968     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6969
6970   if (pcs_variant != ARM_PCS_AAPCS)
6971     {
6972       int i;
6973
6974       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6975         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6976                                                         type))
6977           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6978                                                              mode, type);
6979     }
6980
6981   /* Promotes small structs returned in a register to full-word size
6982      for big-endian AAPCS.  */
6983   if (type && arm_return_in_msb (type))
6984     {
6985       HOST_WIDE_INT size = int_size_in_bytes (type);
6986       if (size % UNITS_PER_WORD != 0)
6987         {
6988           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6989           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6990         }
6991     }
6992
6993   return gen_rtx_REG (mode, R0_REGNUM);
6994 }
6995
6996 static rtx
6997 aapcs_libcall_value (machine_mode mode)
6998 {
6999   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
7000       && GET_MODE_SIZE (mode) <= 4)
7001     mode = SImode;
7002
7003   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
7004 }
7005
7006 /* Lay out a function argument using the AAPCS rules.  The rule
7007    numbers referred to here are those in the AAPCS.  */
7008 static void
7009 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
7010                   const_tree type, bool named)
7011 {
7012   int nregs, nregs2;
7013   int ncrn;
7014
7015   /* We only need to do this once per argument.  */
7016   if (pcum->aapcs_arg_processed)
7017     return;
7018
7019   pcum->aapcs_arg_processed = true;
7020
7021   /* Special case: if named is false then we are handling an incoming
7022      anonymous argument which is on the stack.  */
7023   if (!named)
7024     return;
7025
7026   /* Is this a potential co-processor register candidate?  */
7027   if (pcum->pcs_variant != ARM_PCS_AAPCS)
7028     {
7029       int slot = aapcs_select_call_coproc (pcum, mode, type);
7030       pcum->aapcs_cprc_slot = slot;
7031
7032       /* We don't have to apply any of the rules from part B of the
7033          preparation phase, these are handled elsewhere in the
7034          compiler.  */
7035
7036       if (slot >= 0)
7037         {
7038           /* A Co-processor register candidate goes either in its own
7039              class of registers or on the stack.  */
7040           if (!pcum->aapcs_cprc_failed[slot])
7041             {
7042               /* C1.cp - Try to allocate the argument to co-processor
7043                  registers.  */
7044               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7045                 return;
7046
7047               /* C2.cp - Put the argument on the stack and note that we
7048                  can't assign any more candidates in this slot.  We also
7049                  need to note that we have allocated stack space, so that
7050                  we won't later try to split a non-cprc candidate between
7051                  core registers and the stack.  */
7052               pcum->aapcs_cprc_failed[slot] = true;
7053               pcum->can_split = false;
7054             }
7055
7056           /* We didn't get a register, so this argument goes on the
7057              stack.  */
7058           gcc_assert (pcum->can_split == false);
7059           return;
7060         }
7061     }
7062
7063   /* C3 - For double-word aligned arguments, round the NCRN up to the
7064      next even number.  */
7065   ncrn = pcum->aapcs_ncrn;
7066   if (ncrn & 1)
7067     {
7068       int res = arm_needs_doubleword_align (mode, type);
7069       /* Only warn during RTL expansion of call stmts, otherwise we would
7070          warn e.g. during gimplification even on functions that will be
7071          always inlined, and we'd warn multiple times.  Don't warn when
7072          called in expand_function_start either, as we warn instead in
7073          arm_function_arg_boundary in that case.  */
7074       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7075         inform (input_location, "parameter passing for argument of type "
7076                 "%qT changed in GCC 7.1", type);
7077       else if (res > 0)
7078         ncrn++;
7079     }
7080
7081   nregs = ARM_NUM_REGS2(mode, type);
7082
7083   /* Sigh, this test should really assert that nregs > 0, but a GCC
7084      extension allows empty structs and then gives them empty size; it
7085      then allows such a structure to be passed by value.  For some of
7086      the code below we have to pretend that such an argument has
7087      non-zero size so that we 'locate' it correctly either in
7088      registers or on the stack.  */
7089   gcc_assert (nregs >= 0);
7090
7091   nregs2 = nregs ? nregs : 1;
7092
7093   /* C4 - Argument fits entirely in core registers.  */
7094   if (ncrn + nregs2 <= NUM_ARG_REGS)
7095     {
7096       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7097       pcum->aapcs_next_ncrn = ncrn + nregs;
7098       return;
7099     }
7100
7101   /* C5 - Some core registers left and there are no arguments already
7102      on the stack: split this argument between the remaining core
7103      registers and the stack.  */
7104   if (ncrn < NUM_ARG_REGS && pcum->can_split)
7105     {
7106       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7107       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7108       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7109       return;
7110     }
7111
7112   /* C6 - NCRN is set to 4.  */
7113   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7114
7115   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
7116   return;
7117 }
7118
7119 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7120    for a call to a function whose data type is FNTYPE.
7121    For a library call, FNTYPE is NULL.  */
7122 void
7123 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7124                           rtx libname,
7125                           tree fndecl ATTRIBUTE_UNUSED)
7126 {
7127   /* Long call handling.  */
7128   if (fntype)
7129     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7130   else
7131     pcum->pcs_variant = arm_pcs_default;
7132
7133   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7134     {
7135       if (arm_libcall_uses_aapcs_base (libname))
7136         pcum->pcs_variant = ARM_PCS_AAPCS;
7137
7138       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7139       pcum->aapcs_reg = NULL_RTX;
7140       pcum->aapcs_partial = 0;
7141       pcum->aapcs_arg_processed = false;
7142       pcum->aapcs_cprc_slot = -1;
7143       pcum->can_split = true;
7144
7145       if (pcum->pcs_variant != ARM_PCS_AAPCS)
7146         {
7147           int i;
7148
7149           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7150             {
7151               pcum->aapcs_cprc_failed[i] = false;
7152               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7153             }
7154         }
7155       return;
7156     }
7157
7158   /* Legacy ABIs */
7159
7160   /* On the ARM, the offset starts at 0.  */
7161   pcum->nregs = 0;
7162   pcum->iwmmxt_nregs = 0;
7163   pcum->can_split = true;
7164
7165   /* Varargs vectors are treated the same as long long.
7166      named_count avoids having to change the way arm handles 'named' */
7167   pcum->named_count = 0;
7168   pcum->nargs = 0;
7169
7170   if (TARGET_REALLY_IWMMXT && fntype)
7171     {
7172       tree fn_arg;
7173
7174       for (fn_arg = TYPE_ARG_TYPES (fntype);
7175            fn_arg;
7176            fn_arg = TREE_CHAIN (fn_arg))
7177         pcum->named_count += 1;
7178
7179       if (! pcum->named_count)
7180         pcum->named_count = INT_MAX;
7181     }
7182 }
7183
7184 /* Return 2 if double word alignment is required for argument passing,
7185    but wasn't required before the fix for PR88469.
7186    Return 1 if double word alignment is required for argument passing.
7187    Return -1 if double word alignment used to be required for argument
7188    passing before PR77728 ABI fix, but is not required anymore.
7189    Return 0 if double word alignment is not required and wasn't requried
7190    before either.  */
7191 static int
7192 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7193 {
7194   if (!type)
7195     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7196
7197   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7198   if (!AGGREGATE_TYPE_P (type))
7199     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7200
7201   /* Array types: Use member alignment of element type.  */
7202   if (TREE_CODE (type) == ARRAY_TYPE)
7203     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7204
7205   int ret = 0;
7206   int ret2 = 0;
7207   /* Record/aggregate types: Use greatest member alignment of any member.
7208
7209      Note that we explicitly consider zero-sized fields here, even though
7210      they don't map to AAPCS machine types.  For example, in:
7211
7212          struct __attribute__((aligned(8))) empty {};
7213
7214          struct s {
7215            [[no_unique_address]] empty e;
7216            int x;
7217          };
7218
7219      "s" contains only one Fundamental Data Type (the int field)
7220      but gains 8-byte alignment and size thanks to "e".  */
7221   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7222     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7223       {
7224         if (TREE_CODE (field) == FIELD_DECL)
7225           return 1;
7226         else
7227           /* Before PR77728 fix, we were incorrectly considering also
7228              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7229              Make sure we can warn about that with -Wpsabi.  */
7230           ret = -1;
7231       }
7232     else if (TREE_CODE (field) == FIELD_DECL
7233              && DECL_BIT_FIELD_TYPE (field)
7234              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7235       ret2 = 1;
7236
7237   if (ret2)
7238     return 2;
7239
7240   return ret;
7241 }
7242
7243
7244 /* Determine where to put an argument to a function.
7245    Value is zero to push the argument on the stack,
7246    or a hard register in which to store the argument.
7247
7248    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7249     the preceding args and about the function being called.
7250    ARG is a description of the argument.
7251
7252    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7253    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7254    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7255    defined), say it is passed in the stack (function_prologue will
7256    indeed make it pass in the stack if necessary).  */
7257
7258 static rtx
7259 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7260 {
7261   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7262   int nregs;
7263
7264   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7265      a call insn (op3 of a call_value insn).  */
7266   if (arg.end_marker_p ())
7267     return const0_rtx;
7268
7269   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7270     {
7271       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7272       return pcum->aapcs_reg;
7273     }
7274
7275   /* Varargs vectors are treated the same as long long.
7276      named_count avoids having to change the way arm handles 'named' */
7277   if (TARGET_IWMMXT_ABI
7278       && arm_vector_mode_supported_p (arg.mode)
7279       && pcum->named_count > pcum->nargs + 1)
7280     {
7281       if (pcum->iwmmxt_nregs <= 9)
7282         return gen_rtx_REG (arg.mode,
7283                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7284       else
7285         {
7286           pcum->can_split = false;
7287           return NULL_RTX;
7288         }
7289     }
7290
7291   /* Put doubleword aligned quantities in even register pairs.  */
7292   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7293     {
7294       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7295       if (res < 0 && warn_psabi)
7296         inform (input_location, "parameter passing for argument of type "
7297                 "%qT changed in GCC 7.1", arg.type);
7298       else if (res > 0)
7299         {
7300           pcum->nregs++;
7301           if (res > 1 && warn_psabi)
7302             inform (input_location, "parameter passing for argument of type "
7303                     "%qT changed in GCC 9.1", arg.type);
7304         }
7305     }
7306
7307   /* Only allow splitting an arg between regs and memory if all preceding
7308      args were allocated to regs.  For args passed by reference we only count
7309      the reference pointer.  */
7310   if (pcum->can_split)
7311     nregs = 1;
7312   else
7313     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7314
7315   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7316     return NULL_RTX;
7317
7318   return gen_rtx_REG (arg.mode, pcum->nregs);
7319 }
7320
7321 static unsigned int
7322 arm_function_arg_boundary (machine_mode mode, const_tree type)
7323 {
7324   if (!ARM_DOUBLEWORD_ALIGN)
7325     return PARM_BOUNDARY;
7326
7327   int res = arm_needs_doubleword_align (mode, type);
7328   if (res < 0 && warn_psabi)
7329     inform (input_location, "parameter passing for argument of type %qT "
7330             "changed in GCC 7.1", type);
7331   if (res > 1 && warn_psabi)
7332     inform (input_location, "parameter passing for argument of type "
7333             "%qT changed in GCC 9.1", type);
7334
7335   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7336 }
7337
7338 static int
7339 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7340 {
7341   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7342   int nregs = pcum->nregs;
7343
7344   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7345     {
7346       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7347       return pcum->aapcs_partial;
7348     }
7349
7350   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7351     return 0;
7352
7353   if (NUM_ARG_REGS > nregs
7354       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7355       && pcum->can_split)
7356     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7357
7358   return 0;
7359 }
7360
7361 /* Update the data in PCUM to advance over argument ARG.  */
7362
7363 static void
7364 arm_function_arg_advance (cumulative_args_t pcum_v,
7365                           const function_arg_info &arg)
7366 {
7367   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7368
7369   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7370     {
7371       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7372
7373       if (pcum->aapcs_cprc_slot >= 0)
7374         {
7375           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7376                                                               arg.type);
7377           pcum->aapcs_cprc_slot = -1;
7378         }
7379
7380       /* Generic stuff.  */
7381       pcum->aapcs_arg_processed = false;
7382       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7383       pcum->aapcs_reg = NULL_RTX;
7384       pcum->aapcs_partial = 0;
7385     }
7386   else
7387     {
7388       pcum->nargs += 1;
7389       if (arm_vector_mode_supported_p (arg.mode)
7390           && pcum->named_count > pcum->nargs
7391           && TARGET_IWMMXT_ABI)
7392         pcum->iwmmxt_nregs += 1;
7393       else
7394         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7395     }
7396 }
7397
7398 /* Variable sized types are passed by reference.  This is a GCC
7399    extension to the ARM ABI.  */
7400
7401 static bool
7402 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7403 {
7404   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7405 }
7406 \f
7407 /* Encode the current state of the #pragma [no_]long_calls.  */
7408 typedef enum
7409 {
7410   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7411   LONG,         /* #pragma long_calls is in effect.  */
7412   SHORT         /* #pragma no_long_calls is in effect.  */
7413 } arm_pragma_enum;
7414
7415 static arm_pragma_enum arm_pragma_long_calls = OFF;
7416
7417 void
7418 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7419 {
7420   arm_pragma_long_calls = LONG;
7421 }
7422
7423 void
7424 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7425 {
7426   arm_pragma_long_calls = SHORT;
7427 }
7428
7429 void
7430 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7431 {
7432   arm_pragma_long_calls = OFF;
7433 }
7434 \f
7435 /* Handle an attribute requiring a FUNCTION_DECL;
7436    arguments as in struct attribute_spec.handler.  */
7437 static tree
7438 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7439                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7440 {
7441   if (TREE_CODE (*node) != FUNCTION_DECL)
7442     {
7443       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7444                name);
7445       *no_add_attrs = true;
7446     }
7447
7448   return NULL_TREE;
7449 }
7450
7451 /* Handle an "interrupt" or "isr" attribute;
7452    arguments as in struct attribute_spec.handler.  */
7453 static tree
7454 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7455                           bool *no_add_attrs)
7456 {
7457   if (DECL_P (*node))
7458     {
7459       if (TREE_CODE (*node) != FUNCTION_DECL)
7460         {
7461           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7462                    name);
7463           *no_add_attrs = true;
7464         }
7465       else if (TARGET_VFP_BASE)
7466         {
7467           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7468                    name);
7469         }
7470       /* FIXME: the argument if any is checked for type attributes;
7471          should it be checked for decl ones?  */
7472     }
7473   else
7474     {
7475       if (FUNC_OR_METHOD_TYPE_P (*node))
7476         {
7477           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7478             {
7479               warning (OPT_Wattributes, "%qE attribute ignored",
7480                        name);
7481               *no_add_attrs = true;
7482             }
7483         }
7484       else if (TREE_CODE (*node) == POINTER_TYPE
7485                && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
7486                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7487         {
7488           *node = build_variant_type_copy (*node);
7489           TREE_TYPE (*node) = build_type_attribute_variant
7490             (TREE_TYPE (*node),
7491              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7492           *no_add_attrs = true;
7493         }
7494       else
7495         {
7496           /* Possibly pass this attribute on from the type to a decl.  */
7497           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7498                        | (int) ATTR_FLAG_FUNCTION_NEXT
7499                        | (int) ATTR_FLAG_ARRAY_NEXT))
7500             {
7501               *no_add_attrs = true;
7502               return tree_cons (name, args, NULL_TREE);
7503             }
7504           else
7505             {
7506               warning (OPT_Wattributes, "%qE attribute ignored",
7507                        name);
7508             }
7509         }
7510     }
7511
7512   return NULL_TREE;
7513 }
7514
7515 /* Handle a "pcs" attribute; arguments as in struct
7516    attribute_spec.handler.  */
7517 static tree
7518 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7519                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7520 {
7521   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7522     {
7523       warning (OPT_Wattributes, "%qE attribute ignored", name);
7524       *no_add_attrs = true;
7525     }
7526   return NULL_TREE;
7527 }
7528
7529 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7530 /* Handle the "notshared" attribute.  This attribute is another way of
7531    requesting hidden visibility.  ARM's compiler supports
7532    "__declspec(notshared)"; we support the same thing via an
7533    attribute.  */
7534
7535 static tree
7536 arm_handle_notshared_attribute (tree *node,
7537                                 tree name ATTRIBUTE_UNUSED,
7538                                 tree args ATTRIBUTE_UNUSED,
7539                                 int flags ATTRIBUTE_UNUSED,
7540                                 bool *no_add_attrs)
7541 {
7542   tree decl = TYPE_NAME (*node);
7543
7544   if (decl)
7545     {
7546       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7547       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7548       *no_add_attrs = false;
7549     }
7550   return NULL_TREE;
7551 }
7552 #endif
7553
7554 /* This function returns true if a function with declaration FNDECL and type
7555    FNTYPE uses the stack to pass arguments or return variables and false
7556    otherwise.  This is used for functions with the attributes
7557    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7558    diagnostic messages if the stack is used.  NAME is the name of the attribute
7559    used.  */
7560
7561 static bool
7562 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7563 {
7564   function_args_iterator args_iter;
7565   CUMULATIVE_ARGS args_so_far_v;
7566   cumulative_args_t args_so_far;
7567   bool first_param = true;
7568   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7569
7570   /* Error out if any argument is passed on the stack.  */
7571   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7572   args_so_far = pack_cumulative_args (&args_so_far_v);
7573   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7574     {
7575       rtx arg_rtx;
7576
7577       prev_arg_type = arg_type;
7578       if (VOID_TYPE_P (arg_type))
7579         continue;
7580
7581       function_arg_info arg (arg_type, /*named=*/true);
7582       if (!first_param)
7583         /* ??? We should advance after processing the argument and pass
7584            the argument we're advancing past.  */
7585         arm_function_arg_advance (args_so_far, arg);
7586       arg_rtx = arm_function_arg (args_so_far, arg);
7587       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7588         {
7589           error ("%qE attribute not available to functions with arguments "
7590                  "passed on the stack", name);
7591           return true;
7592         }
7593       first_param = false;
7594     }
7595
7596   /* Error out for variadic functions since we cannot control how many
7597      arguments will be passed and thus stack could be used.  stdarg_p () is not
7598      used for the checking to avoid browsing arguments twice.  */
7599   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7600     {
7601       error ("%qE attribute not available to functions with variable number "
7602              "of arguments", name);
7603       return true;
7604     }
7605
7606   /* Error out if return value is passed on the stack.  */
7607   ret_type = TREE_TYPE (fntype);
7608   if (arm_return_in_memory (ret_type, fntype))
7609     {
7610       error ("%qE attribute not available to functions that return value on "
7611              "the stack", name);
7612       return true;
7613     }
7614   return false;
7615 }
7616
7617 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7618    function will check whether the attribute is allowed here and will add the
7619    attribute to the function declaration tree or otherwise issue a warning.  */
7620
7621 static tree
7622 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7623                                  tree /* args */,
7624                                  int /* flags */,
7625                                  bool *no_add_attrs)
7626 {
7627   tree fndecl;
7628
7629   if (!use_cmse)
7630     {
7631       *no_add_attrs = true;
7632       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7633                "option", name);
7634       return NULL_TREE;
7635     }
7636
7637   /* Ignore attribute for function types.  */
7638   if (TREE_CODE (*node) != FUNCTION_DECL)
7639     {
7640       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7641                name);
7642       *no_add_attrs = true;
7643       return NULL_TREE;
7644     }
7645
7646   fndecl = *node;
7647
7648   /* Warn for static linkage functions.  */
7649   if (!TREE_PUBLIC (fndecl))
7650     {
7651       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7652                "with static linkage", name);
7653       *no_add_attrs = true;
7654       return NULL_TREE;
7655     }
7656
7657   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7658                                                 TREE_TYPE (fndecl));
7659   return NULL_TREE;
7660 }
7661
7662
7663 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7664    function will check whether the attribute is allowed here and will add the
7665    attribute to the function type tree or otherwise issue a diagnostic.  The
7666    reason we check this at declaration time is to only allow the use of the
7667    attribute with declarations of function pointers and not function
7668    declarations.  This function checks NODE is of the expected type and issues
7669    diagnostics otherwise using NAME.  If it is not of the expected type
7670    *NO_ADD_ATTRS will be set to true.  */
7671
7672 static tree
7673 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7674                                  tree /* args */,
7675                                  int /* flags */,
7676                                  bool *no_add_attrs)
7677 {
7678   tree decl = NULL_TREE;
7679   tree fntype, type;
7680
7681   if (!use_cmse)
7682     {
7683       *no_add_attrs = true;
7684       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7685                "option", name);
7686       return NULL_TREE;
7687     }
7688
7689   if (DECL_P (*node))
7690     {
7691       fntype = TREE_TYPE (*node);
7692
7693       if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
7694         decl = *node;
7695     }
7696   else
7697     fntype = *node;
7698
7699   while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7700     fntype = TREE_TYPE (fntype);
7701
7702   if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7703     {
7704         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7705                  "function pointer", name);
7706         *no_add_attrs = true;
7707         return NULL_TREE;
7708     }
7709
7710   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7711
7712   if (*no_add_attrs)
7713     return NULL_TREE;
7714
7715   /* Prevent trees being shared among function types with and without
7716      cmse_nonsecure_call attribute.  */
7717   if (decl)
7718     {
7719       type = build_distinct_type_copy (TREE_TYPE (decl));
7720       TREE_TYPE (decl) = type;
7721     }
7722   else
7723     {
7724       type = build_distinct_type_copy (*node);
7725       *node = type;
7726     }
7727
7728   fntype = type;
7729
7730   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7731     {
7732       type = fntype;
7733       fntype = TREE_TYPE (fntype);
7734       fntype = build_distinct_type_copy (fntype);
7735       TREE_TYPE (type) = fntype;
7736     }
7737
7738   /* Construct a type attribute and add it to the function type.  */
7739   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7740                           TYPE_ATTRIBUTES (fntype));
7741   TYPE_ATTRIBUTES (fntype) = attrs;
7742   return NULL_TREE;
7743 }
7744
7745 /* Return 0 if the attributes for two types are incompatible, 1 if they
7746    are compatible, and 2 if they are nearly compatible (which causes a
7747    warning to be generated).  */
7748 static int
7749 arm_comp_type_attributes (const_tree type1, const_tree type2)
7750 {
7751   int l1, l2, s1, s2;
7752
7753   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7754                                   TYPE_ATTRIBUTES (type1));
7755   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7756                                   TYPE_ATTRIBUTES (type2));
7757   if (bool (attrs1) != bool (attrs2))
7758     return 0;
7759   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7760     return 0;
7761
7762   /* Check for mismatch of non-default calling convention.  */
7763   if (TREE_CODE (type1) != FUNCTION_TYPE)
7764     return 1;
7765
7766   /* Check for mismatched call attributes.  */
7767   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7768   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7769   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7770   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7771
7772   /* Only bother to check if an attribute is defined.  */
7773   if (l1 | l2 | s1 | s2)
7774     {
7775       /* If one type has an attribute, the other must have the same attribute.  */
7776       if ((l1 != l2) || (s1 != s2))
7777         return 0;
7778
7779       /* Disallow mixed attributes.  */
7780       if ((l1 & s2) || (l2 & s1))
7781         return 0;
7782     }
7783
7784   /* Check for mismatched ISR attribute.  */
7785   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7786   if (! l1)
7787     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7788   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7789   if (! l2)
7790     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7791   if (l1 != l2)
7792     return 0;
7793
7794   l1 = lookup_attribute ("cmse_nonsecure_call",
7795                          TYPE_ATTRIBUTES (type1)) != NULL;
7796   l2 = lookup_attribute ("cmse_nonsecure_call",
7797                          TYPE_ATTRIBUTES (type2)) != NULL;
7798
7799   if (l1 != l2)
7800     return 0;
7801
7802   return 1;
7803 }
7804
7805 /*  Assigns default attributes to newly defined type.  This is used to
7806     set short_call/long_call attributes for function types of
7807     functions defined inside corresponding #pragma scopes.  */
7808 static void
7809 arm_set_default_type_attributes (tree type)
7810 {
7811   /* Add __attribute__ ((long_call)) to all functions, when
7812      inside #pragma long_calls or __attribute__ ((short_call)),
7813      when inside #pragma no_long_calls.  */
7814   if (FUNC_OR_METHOD_TYPE_P (type))
7815     {
7816       tree type_attr_list, attr_name;
7817       type_attr_list = TYPE_ATTRIBUTES (type);
7818
7819       if (arm_pragma_long_calls == LONG)
7820         attr_name = get_identifier ("long_call");
7821       else if (arm_pragma_long_calls == SHORT)
7822         attr_name = get_identifier ("short_call");
7823       else
7824         return;
7825
7826       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7827       TYPE_ATTRIBUTES (type) = type_attr_list;
7828     }
7829 }
7830 \f
7831 /* Return true if DECL is known to be linked into section SECTION.  */
7832
7833 static bool
7834 arm_function_in_section_p (tree decl, section *section)
7835 {
7836   /* We can only be certain about the prevailing symbol definition.  */
7837   if (!decl_binds_to_current_def_p (decl))
7838     return false;
7839
7840   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7841   if (!DECL_SECTION_NAME (decl))
7842     {
7843       /* Make sure that we will not create a unique section for DECL.  */
7844       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7845         return false;
7846     }
7847
7848   return function_section (decl) == section;
7849 }
7850
7851 /* Return nonzero if a 32-bit "long_call" should be generated for
7852    a call from the current function to DECL.  We generate a long_call
7853    if the function:
7854
7855         a.  has an __attribute__((long call))
7856      or b.  is within the scope of a #pragma long_calls
7857      or c.  the -mlong-calls command line switch has been specified
7858
7859    However we do not generate a long call if the function:
7860
7861         d.  has an __attribute__ ((short_call))
7862      or e.  is inside the scope of a #pragma no_long_calls
7863      or f.  is defined in the same section as the current function.  */
7864
7865 bool
7866 arm_is_long_call_p (tree decl)
7867 {
7868   tree attrs;
7869
7870   if (!decl)
7871     return TARGET_LONG_CALLS;
7872
7873   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7874   if (lookup_attribute ("short_call", attrs))
7875     return false;
7876
7877   /* For "f", be conservative, and only cater for cases in which the
7878      whole of the current function is placed in the same section.  */
7879   if (!flag_reorder_blocks_and_partition
7880       && TREE_CODE (decl) == FUNCTION_DECL
7881       && arm_function_in_section_p (decl, current_function_section ()))
7882     return false;
7883
7884   if (lookup_attribute ("long_call", attrs))
7885     return true;
7886
7887   return TARGET_LONG_CALLS;
7888 }
7889
7890 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7891 static bool
7892 arm_function_ok_for_sibcall (tree decl, tree exp)
7893 {
7894   unsigned long func_type;
7895
7896   if (cfun->machine->sibcall_blocked)
7897     return false;
7898
7899   if (TARGET_FDPIC)
7900     {
7901       /* In FDPIC, never tailcall something for which we have no decl:
7902          the target function could be in a different module, requiring
7903          a different FDPIC register value.  */
7904       if (decl == NULL)
7905         return false;
7906     }
7907
7908   /* Never tailcall something if we are generating code for Thumb-1.  */
7909   if (TARGET_THUMB1)
7910     return false;
7911
7912   /* The PIC register is live on entry to VxWorks PLT entries, so we
7913      must make the call before restoring the PIC register.  */
7914   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7915     return false;
7916
7917   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7918      may be used both as target of the call and base register for restoring
7919      the VFP registers  */
7920   if (TARGET_APCS_FRAME && TARGET_ARM
7921       && TARGET_HARD_FLOAT
7922       && decl && arm_is_long_call_p (decl))
7923     return false;
7924
7925   /* If we are interworking and the function is not declared static
7926      then we can't tail-call it unless we know that it exists in this
7927      compilation unit (since it might be a Thumb routine).  */
7928   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7929       && !TREE_ASM_WRITTEN (decl))
7930     return false;
7931
7932   func_type = arm_current_func_type ();
7933   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7934   if (IS_INTERRUPT (func_type))
7935     return false;
7936
7937   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7938      generated for entry functions themselves.  */
7939   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7940     return false;
7941
7942   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7943      this would complicate matters for later code generation.  */
7944   if (TREE_CODE (exp) == CALL_EXPR)
7945     {
7946       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7947       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7948         return false;
7949     }
7950
7951   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7952     {
7953       /* Check that the return value locations are the same.  For
7954          example that we aren't returning a value from the sibling in
7955          a VFP register but then need to transfer it to a core
7956          register.  */
7957       rtx a, b;
7958       tree decl_or_type = decl;
7959
7960       /* If it is an indirect function pointer, get the function type.  */
7961       if (!decl)
7962         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7963
7964       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7965       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7966                               cfun->decl, false);
7967       if (!rtx_equal_p (a, b))
7968         return false;
7969     }
7970
7971   /* Never tailcall if function may be called with a misaligned SP.  */
7972   if (IS_STACKALIGN (func_type))
7973     return false;
7974
7975   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7976      references should become a NOP.  Don't convert such calls into
7977      sibling calls.  */
7978   if (TARGET_AAPCS_BASED
7979       && arm_abi == ARM_ABI_AAPCS
7980       && decl
7981       && DECL_WEAK (decl))
7982     return false;
7983
7984   /* We cannot do a tailcall for an indirect call by descriptor if all the
7985      argument registers are used because the only register left to load the
7986      address is IP and it will already contain the static chain.  */
7987   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7988     {
7989       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7990       CUMULATIVE_ARGS cum;
7991       cumulative_args_t cum_v;
7992
7993       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7994       cum_v = pack_cumulative_args (&cum);
7995
7996       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7997         {
7998           tree type = TREE_VALUE (t);
7999           if (!VOID_TYPE_P (type))
8000             {
8001               function_arg_info arg (type, /*named=*/true);
8002               arm_function_arg_advance (cum_v, arg);
8003             }
8004         }
8005
8006       function_arg_info arg (integer_type_node, /*named=*/true);
8007       if (!arm_function_arg (cum_v, arg))
8008         return false;
8009     }
8010
8011   /* Everything else is ok.  */
8012   return true;
8013 }
8014
8015 \f
8016 /* Addressing mode support functions.  */
8017
8018 /* Return nonzero if X is a legitimate immediate operand when compiling
8019    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
8020 int
8021 legitimate_pic_operand_p (rtx x)
8022 {
8023   if (SYMBOL_REF_P (x)
8024       || (GET_CODE (x) == CONST
8025           && GET_CODE (XEXP (x, 0)) == PLUS
8026           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8027     return 0;
8028
8029   return 1;
8030 }
8031
8032 /* Record that the current function needs a PIC register.  If PIC_REG is null,
8033    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
8034    both case cfun->machine->pic_reg is initialized if we have not already done
8035    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
8036    PIC register is reloaded in the current position of the instruction stream
8037    irregardless of whether it was loaded before.  Otherwise, it is only loaded
8038    if not already done so (crtl->uses_pic_offset_table is null).  Note that
8039    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8040    is only supported iff COMPUTE_NOW is false.  */
8041
8042 static void
8043 require_pic_register (rtx pic_reg, bool compute_now)
8044 {
8045   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8046
8047   /* A lot of the logic here is made obscure by the fact that this
8048      routine gets called as part of the rtx cost estimation process.
8049      We don't want those calls to affect any assumptions about the real
8050      function; and further, we can't call entry_of_function() until we
8051      start the real expansion process.  */
8052   if (!crtl->uses_pic_offset_table || compute_now)
8053     {
8054       gcc_assert (can_create_pseudo_p ()
8055                   || (pic_reg != NULL_RTX
8056                       && REG_P (pic_reg)
8057                       && GET_MODE (pic_reg) == Pmode));
8058       if (arm_pic_register != INVALID_REGNUM
8059           && !compute_now
8060           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8061         {
8062           if (!cfun->machine->pic_reg)
8063             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8064
8065           /* Play games to avoid marking the function as needing pic
8066              if we are being called as part of the cost-estimation
8067              process.  */
8068           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8069             crtl->uses_pic_offset_table = 1;
8070         }
8071       else
8072         {
8073           rtx_insn *seq, *insn;
8074
8075           if (pic_reg == NULL_RTX)
8076             pic_reg = gen_reg_rtx (Pmode);
8077           if (!cfun->machine->pic_reg)
8078             cfun->machine->pic_reg = pic_reg;
8079
8080           /* Play games to avoid marking the function as needing pic
8081              if we are being called as part of the cost-estimation
8082              process.  */
8083           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8084             {
8085               crtl->uses_pic_offset_table = 1;
8086               start_sequence ();
8087
8088               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8089                   && arm_pic_register > LAST_LO_REGNUM
8090                   && !compute_now)
8091                 emit_move_insn (cfun->machine->pic_reg,
8092                                 gen_rtx_REG (Pmode, arm_pic_register));
8093               else
8094                 arm_load_pic_register (0UL, pic_reg);
8095
8096               seq = get_insns ();
8097               end_sequence ();
8098
8099               for (insn = seq; insn; insn = NEXT_INSN (insn))
8100                 if (INSN_P (insn))
8101                   INSN_LOCATION (insn) = prologue_location;
8102
8103               /* We can be called during expansion of PHI nodes, where
8104                  we can't yet emit instructions directly in the final
8105                  insn stream.  Queue the insns on the entry edge, they will
8106                  be committed after everything else is expanded.  */
8107               if (currently_expanding_to_rtl)
8108                 insert_insn_on_edge (seq,
8109                                      single_succ_edge
8110                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8111               else
8112                 emit_insn (seq);
8113             }
8114         }
8115     }
8116 }
8117
8118 /* Generate insns to calculate the address of ORIG in pic mode.  */
8119 static rtx_insn *
8120 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8121 {
8122   rtx pat;
8123   rtx mem;
8124
8125   pat = gen_calculate_pic_address (reg, pic_reg, orig);
8126
8127   /* Make the MEM as close to a constant as possible.  */
8128   mem = SET_SRC (pat);
8129   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8130   MEM_READONLY_P (mem) = 1;
8131   MEM_NOTRAP_P (mem) = 1;
8132
8133   return emit_insn (pat);
8134 }
8135
8136 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
8137    created to hold the result of the load.  If not NULL, PIC_REG indicates
8138    which register to use as PIC register, otherwise it is decided by register
8139    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
8140    location in the instruction stream, irregardless of whether it was loaded
8141    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8142    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8143
8144    Returns the register REG into which the PIC load is performed.  */
8145
8146 rtx
8147 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8148                         bool compute_now)
8149 {
8150   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8151
8152   if (SYMBOL_REF_P (orig)
8153       || LABEL_REF_P (orig))
8154     {
8155       if (reg == 0)
8156         {
8157           gcc_assert (can_create_pseudo_p ());
8158           reg = gen_reg_rtx (Pmode);
8159         }
8160
8161       /* VxWorks does not impose a fixed gap between segments; the run-time
8162          gap can be different from the object-file gap.  We therefore can't
8163          use GOTOFF unless we are absolutely sure that the symbol is in the
8164          same segment as the GOT.  Unfortunately, the flexibility of linker
8165          scripts means that we can't be sure of that in general, so assume
8166          that GOTOFF is never valid on VxWorks.  */
8167       /* References to weak symbols cannot be resolved locally: they
8168          may be overridden by a non-weak definition at link time.  */
8169       rtx_insn *insn;
8170       if ((LABEL_REF_P (orig)
8171            || (SYMBOL_REF_P (orig)
8172                && SYMBOL_REF_LOCAL_P (orig)
8173                && (SYMBOL_REF_DECL (orig)
8174                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8175                && (!SYMBOL_REF_FUNCTION_P (orig)
8176                    || arm_fdpic_local_funcdesc_p (orig))))
8177           && NEED_GOT_RELOC
8178           && arm_pic_data_is_text_relative)
8179         insn = arm_pic_static_addr (orig, reg);
8180       else
8181         {
8182           /* If this function doesn't have a pic register, create one now.  */
8183           require_pic_register (pic_reg, compute_now);
8184
8185           if (pic_reg == NULL_RTX)
8186             pic_reg = cfun->machine->pic_reg;
8187
8188           insn = calculate_pic_address_constant (reg, pic_reg, orig);
8189         }
8190
8191       /* Put a REG_EQUAL note on this insn, so that it can be optimized
8192          by loop.  */
8193       set_unique_reg_note (insn, REG_EQUAL, orig);
8194
8195       return reg;
8196     }
8197   else if (GET_CODE (orig) == CONST)
8198     {
8199       rtx base, offset;
8200
8201       if (GET_CODE (XEXP (orig, 0)) == PLUS
8202           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8203         return orig;
8204
8205       /* Handle the case where we have: const (UNSPEC_TLS).  */
8206       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8207           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8208         return orig;
8209
8210       /* Handle the case where we have:
8211          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8212          CONST_INT.  */
8213       if (GET_CODE (XEXP (orig, 0)) == PLUS
8214           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8215           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8216         {
8217           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8218           return orig;
8219         }
8220
8221       if (reg == 0)
8222         {
8223           gcc_assert (can_create_pseudo_p ());
8224           reg = gen_reg_rtx (Pmode);
8225         }
8226
8227       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8228
8229       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8230                                      pic_reg, compute_now);
8231       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8232                                        base == reg ? 0 : reg, pic_reg,
8233                                        compute_now);
8234
8235       if (CONST_INT_P (offset))
8236         {
8237           /* The base register doesn't really matter, we only want to
8238              test the index for the appropriate mode.  */
8239           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8240             {
8241               gcc_assert (can_create_pseudo_p ());
8242               offset = force_reg (Pmode, offset);
8243             }
8244
8245           if (CONST_INT_P (offset))
8246             return plus_constant (Pmode, base, INTVAL (offset));
8247         }
8248
8249       if (GET_MODE_SIZE (mode) > 4
8250           && (GET_MODE_CLASS (mode) == MODE_INT
8251               || TARGET_SOFT_FLOAT))
8252         {
8253           emit_insn (gen_addsi3 (reg, base, offset));
8254           return reg;
8255         }
8256
8257       return gen_rtx_PLUS (Pmode, base, offset);
8258     }
8259
8260   return orig;
8261 }
8262
8263
8264 /* Generate insns that produce the address of the stack canary */
8265 rtx
8266 arm_stack_protect_tls_canary_mem (bool reload)
8267 {
8268   rtx tp = gen_reg_rtx (SImode);
8269   if (reload)
8270     emit_insn (gen_reload_tp_hard (tp));
8271   else
8272     emit_insn (gen_load_tp_hard (tp));
8273
8274   rtx reg = gen_reg_rtx (SImode);
8275   rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8276   emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8277   return gen_rtx_MEM (SImode, reg);
8278 }
8279
8280
8281 /* Whether a register is callee saved or not.  This is necessary because high
8282    registers are marked as caller saved when optimizing for size on Thumb-1
8283    targets despite being callee saved in order to avoid using them.  */
8284 #define callee_saved_reg_p(reg) \
8285   (!call_used_or_fixed_reg_p (reg) \
8286    || (TARGET_THUMB1 && optimize_size \
8287        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8288
8289 /* Return a mask for the call-clobbered low registers that are unused
8290    at the end of the prologue.  */
8291 static unsigned long
8292 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8293 {
8294   unsigned long mask = 0;
8295   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8296
8297   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8298     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8299       mask |= 1 << (reg - FIRST_LO_REGNUM);
8300   return mask;
8301 }
8302
8303 /* Similarly for the start of the epilogue.  */
8304 static unsigned long
8305 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8306 {
8307   unsigned long mask = 0;
8308   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8309
8310   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8311     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8312       mask |= 1 << (reg - FIRST_LO_REGNUM);
8313   return mask;
8314 }
8315
8316 /* Find a spare register to use during the prolog of a function.  */
8317
8318 static int
8319 thumb_find_work_register (unsigned long pushed_regs_mask)
8320 {
8321   int reg;
8322
8323   unsigned long unused_regs
8324     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8325
8326   /* Check the argument registers first as these are call-used.  The
8327      register allocation order means that sometimes r3 might be used
8328      but earlier argument registers might not, so check them all.  */
8329   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8330     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8331       return reg;
8332
8333   /* Otherwise look for a call-saved register that is going to be pushed.  */
8334   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8335     if (pushed_regs_mask & (1 << reg))
8336       return reg;
8337
8338   if (TARGET_THUMB2)
8339     {
8340       /* Thumb-2 can use high regs.  */
8341       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8342         if (pushed_regs_mask & (1 << reg))
8343           return reg;
8344     }
8345   /* Something went wrong - thumb_compute_save_reg_mask()
8346      should have arranged for a suitable register to be pushed.  */
8347   gcc_unreachable ();
8348 }
8349
8350 static GTY(()) int pic_labelno;
8351
8352 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8353    low register.  */
8354
8355 void
8356 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8357 {
8358   rtx l1, labelno, pic_tmp, pic_rtx;
8359
8360   if (crtl->uses_pic_offset_table == 0
8361       || TARGET_SINGLE_PIC_BASE
8362       || TARGET_FDPIC)
8363     return;
8364
8365   gcc_assert (flag_pic);
8366
8367   if (pic_reg == NULL_RTX)
8368     pic_reg = cfun->machine->pic_reg;
8369   if (TARGET_VXWORKS_RTP)
8370     {
8371       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8372       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8373       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8374
8375       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8376
8377       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8378       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8379     }
8380   else
8381     {
8382       /* We use an UNSPEC rather than a LABEL_REF because this label
8383          never appears in the code stream.  */
8384
8385       labelno = GEN_INT (pic_labelno++);
8386       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8387       l1 = gen_rtx_CONST (VOIDmode, l1);
8388
8389       /* On the ARM the PC register contains 'dot + 8' at the time of the
8390          addition, on the Thumb it is 'dot + 4'.  */
8391       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8392       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8393                                 UNSPEC_GOTSYM_OFF);
8394       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8395
8396       if (TARGET_32BIT)
8397         {
8398           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8399         }
8400       else /* TARGET_THUMB1 */
8401         {
8402           if (arm_pic_register != INVALID_REGNUM
8403               && REGNO (pic_reg) > LAST_LO_REGNUM)
8404             {
8405               /* We will have pushed the pic register, so we should always be
8406                  able to find a work register.  */
8407               pic_tmp = gen_rtx_REG (SImode,
8408                                      thumb_find_work_register (saved_regs));
8409               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8410               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8411               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8412             }
8413           else if (arm_pic_register != INVALID_REGNUM
8414                    && arm_pic_register > LAST_LO_REGNUM
8415                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8416             {
8417               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8418               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8419               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8420             }
8421           else
8422             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8423         }
8424     }
8425
8426   /* Need to emit this whether or not we obey regdecls,
8427      since setjmp/longjmp can cause life info to screw up.  */
8428   emit_use (pic_reg);
8429 }
8430
8431 /* Try to determine whether an object, referenced via ORIG, will be
8432    placed in the text or data segment.  This is used in FDPIC mode, to
8433    decide which relocations to use when accessing ORIG.  *IS_READONLY
8434    is set to true if ORIG is a read-only location, false otherwise.
8435    Return true if we could determine the location of ORIG, false
8436    otherwise.  *IS_READONLY is valid only when we return true.  */
8437 static bool
8438 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8439 {
8440   *is_readonly = false;
8441
8442   if (LABEL_REF_P (orig))
8443     {
8444       *is_readonly = true;
8445       return true;
8446     }
8447
8448   if (SYMBOL_REF_P (orig))
8449     {
8450       if (CONSTANT_POOL_ADDRESS_P (orig))
8451         {
8452           *is_readonly = true;
8453           return true;
8454         }
8455       if (SYMBOL_REF_LOCAL_P (orig)
8456           && !SYMBOL_REF_EXTERNAL_P (orig)
8457           && SYMBOL_REF_DECL (orig)
8458           && (!DECL_P (SYMBOL_REF_DECL (orig))
8459               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8460         {
8461           tree decl = SYMBOL_REF_DECL (orig);
8462           tree init = VAR_P (decl)
8463             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8464             ? decl : 0;
8465           int reloc = 0;
8466           bool named_section, readonly;
8467
8468           if (init && init != error_mark_node)
8469             reloc = compute_reloc_for_constant (init);
8470
8471           named_section = VAR_P (decl)
8472             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8473           readonly = decl_readonly_section (decl, reloc);
8474
8475           /* We don't know where the link script will put a named
8476              section, so return false in such a case.  */
8477           if (named_section)
8478             return false;
8479
8480           *is_readonly = readonly;
8481           return true;
8482         }
8483
8484       /* We don't know.  */
8485       return false;
8486     }
8487
8488   gcc_unreachable ();
8489 }
8490
8491 /* Generate code to load the address of a static var when flag_pic is set.  */
8492 static rtx_insn *
8493 arm_pic_static_addr (rtx orig, rtx reg)
8494 {
8495   rtx l1, labelno, offset_rtx;
8496   rtx_insn *insn;
8497
8498   gcc_assert (flag_pic);
8499
8500   bool is_readonly = false;
8501   bool info_known = false;
8502
8503   if (TARGET_FDPIC
8504       && SYMBOL_REF_P (orig)
8505       && !SYMBOL_REF_FUNCTION_P (orig))
8506     info_known = arm_is_segment_info_known (orig, &is_readonly);
8507
8508   if (TARGET_FDPIC
8509       && SYMBOL_REF_P (orig)
8510       && !SYMBOL_REF_FUNCTION_P (orig)
8511       && !info_known)
8512     {
8513       /* We don't know where orig is stored, so we have be
8514          pessimistic and use a GOT relocation.  */
8515       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8516
8517       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8518     }
8519   else if (TARGET_FDPIC
8520            && SYMBOL_REF_P (orig)
8521            && (SYMBOL_REF_FUNCTION_P (orig)
8522                || !is_readonly))
8523     {
8524       /* We use the GOTOFF relocation.  */
8525       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8526
8527       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8528       emit_insn (gen_movsi (reg, l1));
8529       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8530     }
8531   else
8532     {
8533       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8534          PC-relative access.  */
8535       /* We use an UNSPEC rather than a LABEL_REF because this label
8536          never appears in the code stream.  */
8537       labelno = GEN_INT (pic_labelno++);
8538       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8539       l1 = gen_rtx_CONST (VOIDmode, l1);
8540
8541       /* On the ARM the PC register contains 'dot + 8' at the time of the
8542          addition, on the Thumb it is 'dot + 4'.  */
8543       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8544       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8545                                    UNSPEC_SYMBOL_OFFSET);
8546       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8547
8548       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8549                                                    labelno));
8550     }
8551
8552   return insn;
8553 }
8554
8555 /* Return nonzero if X is valid as an ARM state addressing register.  */
8556 static int
8557 arm_address_register_rtx_p (rtx x, int strict_p)
8558 {
8559   int regno;
8560
8561   if (!REG_P (x))
8562     return 0;
8563
8564   regno = REGNO (x);
8565
8566   if (strict_p)
8567     return ARM_REGNO_OK_FOR_BASE_P (regno);
8568
8569   return (regno <= LAST_ARM_REGNUM
8570           || regno >= FIRST_PSEUDO_REGISTER
8571           || regno == FRAME_POINTER_REGNUM
8572           || regno == ARG_POINTER_REGNUM);
8573 }
8574
8575 /* Return TRUE if this rtx is the difference of a symbol and a label,
8576    and will reduce to a PC-relative relocation in the object file.
8577    Expressions like this can be left alone when generating PIC, rather
8578    than forced through the GOT.  */
8579 static int
8580 pcrel_constant_p (rtx x)
8581 {
8582   if (GET_CODE (x) == MINUS)
8583     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8584
8585   return FALSE;
8586 }
8587
8588 /* Return true if X will surely end up in an index register after next
8589    splitting pass.  */
8590 static bool
8591 will_be_in_index_register (const_rtx x)
8592 {
8593   /* arm.md: calculate_pic_address will split this into a register.  */
8594   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8595 }
8596
8597 /* Return nonzero if X is a valid ARM state address operand.  */
8598 int
8599 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8600                                 int strict_p)
8601 {
8602   bool use_ldrd;
8603   enum rtx_code code = GET_CODE (x);
8604
8605   if (arm_address_register_rtx_p (x, strict_p))
8606     return 1;
8607
8608   use_ldrd = (TARGET_LDRD
8609               && (mode == DImode || mode == DFmode));
8610
8611   if (code == POST_INC || code == PRE_DEC
8612       || ((code == PRE_INC || code == POST_DEC)
8613           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8614     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8615
8616   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8617            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8618            && GET_CODE (XEXP (x, 1)) == PLUS
8619            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8620     {
8621       rtx addend = XEXP (XEXP (x, 1), 1);
8622
8623       /* Don't allow ldrd post increment by register because it's hard
8624          to fixup invalid register choices.  */
8625       if (use_ldrd
8626           && GET_CODE (x) == POST_MODIFY
8627           && REG_P (addend))
8628         return 0;
8629
8630       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8631               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8632     }
8633
8634   /* After reload constants split into minipools will have addresses
8635      from a LABEL_REF.  */
8636   else if (reload_completed
8637            && (code == LABEL_REF
8638                || (code == CONST
8639                    && GET_CODE (XEXP (x, 0)) == PLUS
8640                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8641                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8642     return 1;
8643
8644   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8645     return 0;
8646
8647   else if (code == PLUS)
8648     {
8649       rtx xop0 = XEXP (x, 0);
8650       rtx xop1 = XEXP (x, 1);
8651
8652       return ((arm_address_register_rtx_p (xop0, strict_p)
8653                && ((CONST_INT_P (xop1)
8654                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8655                    || (!strict_p && will_be_in_index_register (xop1))))
8656               || (arm_address_register_rtx_p (xop1, strict_p)
8657                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8658     }
8659
8660 #if 0
8661   /* Reload currently can't handle MINUS, so disable this for now */
8662   else if (GET_CODE (x) == MINUS)
8663     {
8664       rtx xop0 = XEXP (x, 0);
8665       rtx xop1 = XEXP (x, 1);
8666
8667       return (arm_address_register_rtx_p (xop0, strict_p)
8668               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8669     }
8670 #endif
8671
8672   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8673            && code == SYMBOL_REF
8674            && CONSTANT_POOL_ADDRESS_P (x)
8675            && ! (flag_pic
8676                  && symbol_mentioned_p (get_pool_constant (x))
8677                  && ! pcrel_constant_p (get_pool_constant (x))))
8678     return 1;
8679
8680   return 0;
8681 }
8682
8683 /* Return true if we can avoid creating a constant pool entry for x.  */
8684 static bool
8685 can_avoid_literal_pool_for_label_p (rtx x)
8686 {
8687   /* Normally we can assign constant values to target registers without
8688      the help of constant pool.  But there are cases we have to use constant
8689      pool like:
8690      1) assign a label to register.
8691      2) sign-extend a 8bit value to 32bit and then assign to register.
8692
8693      Constant pool access in format:
8694      (set (reg r0) (mem (symbol_ref (".LC0"))))
8695      will cause the use of literal pool (later in function arm_reorg).
8696      So here we mark such format as an invalid format, then the compiler
8697      will adjust it into:
8698      (set (reg r0) (symbol_ref (".LC0")))
8699      (set (reg r0) (mem (reg r0))).
8700      No extra register is required, and (mem (reg r0)) won't cause the use
8701      of literal pools.  */
8702   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8703       && CONSTANT_POOL_ADDRESS_P (x))
8704     return 1;
8705   return 0;
8706 }
8707
8708
8709 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8710 static int
8711 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8712 {
8713   bool use_ldrd;
8714   enum rtx_code code = GET_CODE (x);
8715
8716   /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8717      can store and load it like any other 16-bit value.  */
8718   if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8719     mode = HImode;
8720
8721   if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8722     return mve_vector_mem_operand (mode, x, strict_p);
8723
8724   if (arm_address_register_rtx_p (x, strict_p))
8725     return 1;
8726
8727   use_ldrd = (TARGET_LDRD
8728               && (mode == DImode || mode == DFmode));
8729
8730   if (code == POST_INC || code == PRE_DEC
8731       || ((code == PRE_INC || code == POST_DEC)
8732           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8733     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8734
8735   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8736            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8737            && GET_CODE (XEXP (x, 1)) == PLUS
8738            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8739     {
8740       /* Thumb-2 only has autoincrement by constant.  */
8741       rtx addend = XEXP (XEXP (x, 1), 1);
8742       HOST_WIDE_INT offset;
8743
8744       if (!CONST_INT_P (addend))
8745         return 0;
8746
8747       offset = INTVAL(addend);
8748       if (GET_MODE_SIZE (mode) <= 4)
8749         return (offset > -256 && offset < 256);
8750
8751       return (use_ldrd && offset > -1024 && offset < 1024
8752               && (offset & 3) == 0);
8753     }
8754
8755   /* After reload constants split into minipools will have addresses
8756      from a LABEL_REF.  */
8757   else if (reload_completed
8758            && (code == LABEL_REF
8759                || (code == CONST
8760                    && GET_CODE (XEXP (x, 0)) == PLUS
8761                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8762                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8763     return 1;
8764
8765   else if (mode == TImode
8766            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8767            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8768     return 0;
8769
8770   else if (code == PLUS)
8771     {
8772       rtx xop0 = XEXP (x, 0);
8773       rtx xop1 = XEXP (x, 1);
8774
8775       return ((arm_address_register_rtx_p (xop0, strict_p)
8776                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8777                    || (!strict_p && will_be_in_index_register (xop1))))
8778               || (arm_address_register_rtx_p (xop1, strict_p)
8779                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8780     }
8781
8782   else if (can_avoid_literal_pool_for_label_p (x))
8783     return 0;
8784
8785   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8786            && code == SYMBOL_REF
8787            && CONSTANT_POOL_ADDRESS_P (x)
8788            && ! (flag_pic
8789                  && symbol_mentioned_p (get_pool_constant (x))
8790                  && ! pcrel_constant_p (get_pool_constant (x))))
8791     return 1;
8792
8793   return 0;
8794 }
8795
8796 /* Return nonzero if INDEX is valid for an address index operand in
8797    ARM state.  */
8798 static int
8799 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8800                         int strict_p)
8801 {
8802   HOST_WIDE_INT range;
8803   enum rtx_code code = GET_CODE (index);
8804
8805   /* Standard coprocessor addressing modes.  */
8806   if (TARGET_HARD_FLOAT
8807       && (mode == SFmode || mode == DFmode))
8808     return (code == CONST_INT && INTVAL (index) < 1024
8809             && INTVAL (index) > -1024
8810             && (INTVAL (index) & 3) == 0);
8811
8812   /* For quad modes, we restrict the constant offset to be slightly less
8813      than what the instruction format permits.  We do this because for
8814      quad mode moves, we will actually decompose them into two separate
8815      double-mode reads or writes.  INDEX must therefore be a valid
8816      (double-mode) offset and so should INDEX+8.  */
8817   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8818     return (code == CONST_INT
8819             && INTVAL (index) < 1016
8820             && INTVAL (index) > -1024
8821             && (INTVAL (index) & 3) == 0);
8822
8823   /* We have no such constraint on double mode offsets, so we permit the
8824      full range of the instruction format.  */
8825   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8826     return (code == CONST_INT
8827             && INTVAL (index) < 1024
8828             && INTVAL (index) > -1024
8829             && (INTVAL (index) & 3) == 0);
8830
8831   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8832     return (code == CONST_INT
8833             && INTVAL (index) < 1024
8834             && INTVAL (index) > -1024
8835             && (INTVAL (index) & 3) == 0);
8836
8837   if (arm_address_register_rtx_p (index, strict_p)
8838       && (GET_MODE_SIZE (mode) <= 4))
8839     return 1;
8840
8841   if (mode == DImode || mode == DFmode)
8842     {
8843       if (code == CONST_INT)
8844         {
8845           HOST_WIDE_INT val = INTVAL (index);
8846
8847           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8848              If vldr is selected it uses arm_coproc_mem_operand.  */
8849           if (TARGET_LDRD)
8850             return val > -256 && val < 256;
8851           else
8852             return val > -4096 && val < 4092;
8853         }
8854
8855       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8856     }
8857
8858   if (GET_MODE_SIZE (mode) <= 4
8859       && ! (arm_arch4
8860             && (mode == HImode
8861                 || mode == HFmode
8862                 || (mode == QImode && outer == SIGN_EXTEND))))
8863     {
8864       if (code == MULT)
8865         {
8866           rtx xiop0 = XEXP (index, 0);
8867           rtx xiop1 = XEXP (index, 1);
8868
8869           return ((arm_address_register_rtx_p (xiop0, strict_p)
8870                    && power_of_two_operand (xiop1, SImode))
8871                   || (arm_address_register_rtx_p (xiop1, strict_p)
8872                       && power_of_two_operand (xiop0, SImode)));
8873         }
8874       else if (code == LSHIFTRT || code == ASHIFTRT
8875                || code == ASHIFT || code == ROTATERT)
8876         {
8877           rtx op = XEXP (index, 1);
8878
8879           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8880                   && CONST_INT_P (op)
8881                   && INTVAL (op) > 0
8882                   && INTVAL (op) <= 31);
8883         }
8884     }
8885
8886   /* For ARM v4 we may be doing a sign-extend operation during the
8887      load.  */
8888   if (arm_arch4)
8889     {
8890       if (mode == HImode
8891           || mode == HFmode
8892           || (outer == SIGN_EXTEND && mode == QImode))
8893         range = 256;
8894       else
8895         range = 4096;
8896     }
8897   else
8898     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8899
8900   return (code == CONST_INT
8901           && INTVAL (index) < range
8902           && INTVAL (index) > -range);
8903 }
8904
8905 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8906    index operand.  i.e. 1, 2, 4 or 8.  */
8907 static bool
8908 thumb2_index_mul_operand (rtx op)
8909 {
8910   HOST_WIDE_INT val;
8911
8912   if (!CONST_INT_P (op))
8913     return false;
8914
8915   val = INTVAL(op);
8916   return (val == 1 || val == 2 || val == 4 || val == 8);
8917 }
8918
8919 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8920 static int
8921 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8922 {
8923   enum rtx_code code = GET_CODE (index);
8924
8925   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8926   /* Standard coprocessor addressing modes.  */
8927   if (TARGET_VFP_BASE
8928       && (mode == SFmode || mode == DFmode))
8929     return (code == CONST_INT && INTVAL (index) < 1024
8930             /* Thumb-2 allows only > -256 index range for it's core register
8931                load/stores. Since we allow SF/DF in core registers, we have
8932                to use the intersection between -256~4096 (core) and -1024~1024
8933                (coprocessor).  */
8934             && INTVAL (index) > -256
8935             && (INTVAL (index) & 3) == 0);
8936
8937   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8938     {
8939       /* For DImode assume values will usually live in core regs
8940          and only allow LDRD addressing modes.  */
8941       if (!TARGET_LDRD || mode != DImode)
8942         return (code == CONST_INT
8943                 && INTVAL (index) < 1024
8944                 && INTVAL (index) > -1024
8945                 && (INTVAL (index) & 3) == 0);
8946     }
8947
8948   /* For quad modes, we restrict the constant offset to be slightly less
8949      than what the instruction format permits.  We do this because for
8950      quad mode moves, we will actually decompose them into two separate
8951      double-mode reads or writes.  INDEX must therefore be a valid
8952      (double-mode) offset and so should INDEX+8.  */
8953   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8954     return (code == CONST_INT
8955             && INTVAL (index) < 1016
8956             && INTVAL (index) > -1024
8957             && (INTVAL (index) & 3) == 0);
8958
8959   /* We have no such constraint on double mode offsets, so we permit the
8960      full range of the instruction format.  */
8961   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8962     return (code == CONST_INT
8963             && INTVAL (index) < 1024
8964             && INTVAL (index) > -1024
8965             && (INTVAL (index) & 3) == 0);
8966
8967   if (arm_address_register_rtx_p (index, strict_p)
8968       && (GET_MODE_SIZE (mode) <= 4))
8969     return 1;
8970
8971   if (mode == DImode || mode == DFmode)
8972     {
8973       if (code == CONST_INT)
8974         {
8975           HOST_WIDE_INT val = INTVAL (index);
8976           /* Thumb-2 ldrd only has reg+const addressing modes.
8977              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8978              If vldr is selected it uses arm_coproc_mem_operand.  */
8979           if (TARGET_LDRD)
8980             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8981           else
8982             return IN_RANGE (val, -255, 4095 - 4);
8983         }
8984       else
8985         return 0;
8986     }
8987
8988   if (code == MULT)
8989     {
8990       rtx xiop0 = XEXP (index, 0);
8991       rtx xiop1 = XEXP (index, 1);
8992
8993       return ((arm_address_register_rtx_p (xiop0, strict_p)
8994                && thumb2_index_mul_operand (xiop1))
8995               || (arm_address_register_rtx_p (xiop1, strict_p)
8996                   && thumb2_index_mul_operand (xiop0)));
8997     }
8998   else if (code == ASHIFT)
8999     {
9000       rtx op = XEXP (index, 1);
9001
9002       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
9003               && CONST_INT_P (op)
9004               && INTVAL (op) > 0
9005               && INTVAL (op) <= 3);
9006     }
9007
9008   return (code == CONST_INT
9009           && INTVAL (index) < 4096
9010           && INTVAL (index) > -256);
9011 }
9012
9013 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
9014 static int
9015 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
9016 {
9017   int regno;
9018
9019   if (!REG_P (x))
9020     return 0;
9021
9022   regno = REGNO (x);
9023
9024   if (strict_p)
9025     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
9026
9027   return (regno <= LAST_LO_REGNUM
9028           || regno > LAST_VIRTUAL_REGISTER
9029           || regno == FRAME_POINTER_REGNUM
9030           || (GET_MODE_SIZE (mode) >= 4
9031               && (regno == STACK_POINTER_REGNUM
9032                   || regno >= FIRST_PSEUDO_REGISTER
9033                   || x == hard_frame_pointer_rtx
9034                   || x == arg_pointer_rtx)));
9035 }
9036
9037 /* Return nonzero if x is a legitimate index register.  This is the case
9038    for any base register that can access a QImode object.  */
9039 inline static int
9040 thumb1_index_register_rtx_p (rtx x, int strict_p)
9041 {
9042   return thumb1_base_register_rtx_p (x, QImode, strict_p);
9043 }
9044
9045 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9046
9047    The AP may be eliminated to either the SP or the FP, so we use the
9048    least common denominator, e.g. SImode, and offsets from 0 to 64.
9049
9050    ??? Verify whether the above is the right approach.
9051
9052    ??? Also, the FP may be eliminated to the SP, so perhaps that
9053    needs special handling also.
9054
9055    ??? Look at how the mips16 port solves this problem.  It probably uses
9056    better ways to solve some of these problems.
9057
9058    Although it is not incorrect, we don't accept QImode and HImode
9059    addresses based on the frame pointer or arg pointer until the
9060    reload pass starts.  This is so that eliminating such addresses
9061    into stack based ones won't produce impossible code.  */
9062 int
9063 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9064 {
9065   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9066     return 0;
9067
9068   /* ??? Not clear if this is right.  Experiment.  */
9069   if (GET_MODE_SIZE (mode) < 4
9070       && !(reload_in_progress || reload_completed)
9071       && (reg_mentioned_p (frame_pointer_rtx, x)
9072           || reg_mentioned_p (arg_pointer_rtx, x)
9073           || reg_mentioned_p (virtual_incoming_args_rtx, x)
9074           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9075           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9076           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9077     return 0;
9078
9079   /* Accept any base register.  SP only in SImode or larger.  */
9080   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9081     return 1;
9082
9083   /* This is PC relative data before arm_reorg runs.  */
9084   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9085            && SYMBOL_REF_P (x)
9086            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9087            && !arm_disable_literal_pool)
9088     return 1;
9089
9090   /* This is PC relative data after arm_reorg runs.  */
9091   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9092            && reload_completed
9093            && (LABEL_REF_P (x)
9094                || (GET_CODE (x) == CONST
9095                    && GET_CODE (XEXP (x, 0)) == PLUS
9096                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9097                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9098     return 1;
9099
9100   /* Post-inc indexing only supported for SImode and larger.  */
9101   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9102            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9103     return 1;
9104
9105   else if (GET_CODE (x) == PLUS)
9106     {
9107       /* REG+REG address can be any two index registers.  */
9108       /* We disallow FRAME+REG addressing since we know that FRAME
9109          will be replaced with STACK, and SP relative addressing only
9110          permits SP+OFFSET.  */
9111       if (GET_MODE_SIZE (mode) <= 4
9112           && XEXP (x, 0) != frame_pointer_rtx
9113           && XEXP (x, 1) != frame_pointer_rtx
9114           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9115           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9116               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9117         return 1;
9118
9119       /* REG+const has 5-7 bit offset for non-SP registers.  */
9120       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9121                 || XEXP (x, 0) == arg_pointer_rtx)
9122                && CONST_INT_P (XEXP (x, 1))
9123                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9124         return 1;
9125
9126       /* REG+const has 10-bit offset for SP, but only SImode and
9127          larger is supported.  */
9128       /* ??? Should probably check for DI/DFmode overflow here
9129          just like GO_IF_LEGITIMATE_OFFSET does.  */
9130       else if (REG_P (XEXP (x, 0))
9131                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9132                && GET_MODE_SIZE (mode) >= 4
9133                && CONST_INT_P (XEXP (x, 1))
9134                && INTVAL (XEXP (x, 1)) >= 0
9135                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9136                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9137         return 1;
9138
9139       else if (REG_P (XEXP (x, 0))
9140                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9141                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9142                    || VIRTUAL_REGISTER_P (XEXP (x, 0)))
9143                && GET_MODE_SIZE (mode) >= 4
9144                && CONST_INT_P (XEXP (x, 1))
9145                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9146         return 1;
9147     }
9148
9149   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9150            && GET_MODE_SIZE (mode) == 4
9151            && SYMBOL_REF_P (x)
9152            && CONSTANT_POOL_ADDRESS_P (x)
9153            && !arm_disable_literal_pool
9154            && ! (flag_pic
9155                  && symbol_mentioned_p (get_pool_constant (x))
9156                  && ! pcrel_constant_p (get_pool_constant (x))))
9157     return 1;
9158
9159   return 0;
9160 }
9161
9162 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9163    instruction of mode MODE.  */
9164 int
9165 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9166 {
9167   switch (GET_MODE_SIZE (mode))
9168     {
9169     case 1:
9170       return val >= 0 && val < 32;
9171
9172     case 2:
9173       return val >= 0 && val < 64 && (val & 1) == 0;
9174
9175     default:
9176       return (val >= 0
9177               && (val + GET_MODE_SIZE (mode)) <= 128
9178               && (val & 3) == 0);
9179     }
9180 }
9181
9182 bool
9183 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
9184 {
9185   if (TARGET_ARM)
9186     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9187   else if (TARGET_THUMB2)
9188     return thumb2_legitimate_address_p (mode, x, strict_p);
9189   else /* if (TARGET_THUMB1) */
9190     return thumb1_legitimate_address_p (mode, x, strict_p);
9191 }
9192
9193 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9194
9195    Given an rtx X being reloaded into a reg required to be
9196    in class CLASS, return the class of reg to actually use.
9197    In general this is just CLASS, but for the Thumb core registers and
9198    immediate constants we prefer a LO_REGS class or a subset.  */
9199
9200 static reg_class_t
9201 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9202 {
9203   if (TARGET_32BIT)
9204     return rclass;
9205   else
9206     {
9207       if (rclass == GENERAL_REGS)
9208         return LO_REGS;
9209       else
9210         return rclass;
9211     }
9212 }
9213
9214 /* Build the SYMBOL_REF for __tls_get_addr.  */
9215
9216 static GTY(()) rtx tls_get_addr_libfunc;
9217
9218 static rtx
9219 get_tls_get_addr (void)
9220 {
9221   if (!tls_get_addr_libfunc)
9222     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9223   return tls_get_addr_libfunc;
9224 }
9225
9226 rtx
9227 arm_load_tp (rtx target)
9228 {
9229   if (!target)
9230     target = gen_reg_rtx (SImode);
9231
9232   if (TARGET_HARD_TP)
9233     {
9234       /* Can return in any reg.  */
9235       emit_insn (gen_load_tp_hard (target));
9236     }
9237   else
9238     {
9239       /* Always returned in r0.  Immediately copy the result into a pseudo,
9240          otherwise other uses of r0 (e.g. setting up function arguments) may
9241          clobber the value.  */
9242
9243       rtx tmp;
9244
9245       if (TARGET_FDPIC)
9246         {
9247           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9248           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9249
9250           emit_insn (gen_load_tp_soft_fdpic ());
9251
9252           /* Restore r9.  */
9253           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9254         }
9255       else
9256         emit_insn (gen_load_tp_soft ());
9257
9258       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9259       emit_move_insn (target, tmp);
9260     }
9261   return target;
9262 }
9263
9264 static rtx
9265 load_tls_operand (rtx x, rtx reg)
9266 {
9267   rtx tmp;
9268
9269   if (reg == NULL_RTX)
9270     reg = gen_reg_rtx (SImode);
9271
9272   tmp = gen_rtx_CONST (SImode, x);
9273
9274   emit_move_insn (reg, tmp);
9275
9276   return reg;
9277 }
9278
9279 static rtx_insn *
9280 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9281 {
9282   rtx label, labelno = NULL_RTX, sum;
9283
9284   gcc_assert (reloc != TLS_DESCSEQ);
9285   start_sequence ();
9286
9287   if (TARGET_FDPIC)
9288     {
9289       sum = gen_rtx_UNSPEC (Pmode,
9290                             gen_rtvec (2, x, GEN_INT (reloc)),
9291                             UNSPEC_TLS);
9292     }
9293   else
9294     {
9295       labelno = GEN_INT (pic_labelno++);
9296       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9297       label = gen_rtx_CONST (VOIDmode, label);
9298
9299       sum = gen_rtx_UNSPEC (Pmode,
9300                             gen_rtvec (4, x, GEN_INT (reloc), label,
9301                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9302                             UNSPEC_TLS);
9303     }
9304   reg = load_tls_operand (sum, reg);
9305
9306   if (TARGET_FDPIC)
9307       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9308   else if (TARGET_ARM)
9309     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9310   else
9311     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9312
9313   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9314                                      LCT_PURE, /* LCT_CONST?  */
9315                                      Pmode, reg, Pmode);
9316
9317   rtx_insn *insns = get_insns ();
9318   end_sequence ();
9319
9320   return insns;
9321 }
9322
9323 static rtx
9324 arm_tls_descseq_addr (rtx x, rtx reg)
9325 {
9326   rtx labelno = GEN_INT (pic_labelno++);
9327   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9328   rtx sum = gen_rtx_UNSPEC (Pmode,
9329                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9330                                        gen_rtx_CONST (VOIDmode, label),
9331                                        GEN_INT (!TARGET_ARM)),
9332                             UNSPEC_TLS);
9333   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9334
9335   emit_insn (gen_tlscall (x, labelno));
9336   if (!reg)
9337     reg = gen_reg_rtx (SImode);
9338   else
9339     gcc_assert (REGNO (reg) != R0_REGNUM);
9340
9341   emit_move_insn (reg, reg0);
9342
9343   return reg;
9344 }
9345
9346
9347 rtx
9348 legitimize_tls_address (rtx x, rtx reg)
9349 {
9350   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9351   rtx_insn *insns;
9352   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9353
9354   switch (model)
9355     {
9356     case TLS_MODEL_GLOBAL_DYNAMIC:
9357       if (TARGET_GNU2_TLS)
9358         {
9359           gcc_assert (!TARGET_FDPIC);
9360
9361           reg = arm_tls_descseq_addr (x, reg);
9362
9363           tp = arm_load_tp (NULL_RTX);
9364
9365           dest = gen_rtx_PLUS (Pmode, tp, reg);
9366         }
9367       else
9368         {
9369           /* Original scheme */
9370           if (TARGET_FDPIC)
9371             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9372           else
9373             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9374           dest = gen_reg_rtx (Pmode);
9375           emit_libcall_block (insns, dest, ret, x);
9376         }
9377       return dest;
9378
9379     case TLS_MODEL_LOCAL_DYNAMIC:
9380       if (TARGET_GNU2_TLS)
9381         {
9382           gcc_assert (!TARGET_FDPIC);
9383
9384           reg = arm_tls_descseq_addr (x, reg);
9385
9386           tp = arm_load_tp (NULL_RTX);
9387
9388           dest = gen_rtx_PLUS (Pmode, tp, reg);
9389         }
9390       else
9391         {
9392           if (TARGET_FDPIC)
9393             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9394           else
9395             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9396
9397           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9398              share the LDM result with other LD model accesses.  */
9399           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9400                                 UNSPEC_TLS);
9401           dest = gen_reg_rtx (Pmode);
9402           emit_libcall_block (insns, dest, ret, eqv);
9403
9404           /* Load the addend.  */
9405           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9406                                                      GEN_INT (TLS_LDO32)),
9407                                    UNSPEC_TLS);
9408           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9409           dest = gen_rtx_PLUS (Pmode, dest, addend);
9410         }
9411       return dest;
9412
9413     case TLS_MODEL_INITIAL_EXEC:
9414       if (TARGET_FDPIC)
9415         {
9416           sum = gen_rtx_UNSPEC (Pmode,
9417                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9418                                 UNSPEC_TLS);
9419           reg = load_tls_operand (sum, reg);
9420           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9421           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9422         }
9423       else
9424         {
9425           labelno = GEN_INT (pic_labelno++);
9426           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9427           label = gen_rtx_CONST (VOIDmode, label);
9428           sum = gen_rtx_UNSPEC (Pmode,
9429                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9430                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9431                                 UNSPEC_TLS);
9432           reg = load_tls_operand (sum, reg);
9433
9434           if (TARGET_ARM)
9435             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9436           else if (TARGET_THUMB2)
9437             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9438           else
9439             {
9440               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9441               emit_move_insn (reg, gen_const_mem (SImode, reg));
9442             }
9443         }
9444
9445       tp = arm_load_tp (NULL_RTX);
9446
9447       return gen_rtx_PLUS (Pmode, tp, reg);
9448
9449     case TLS_MODEL_LOCAL_EXEC:
9450       tp = arm_load_tp (NULL_RTX);
9451
9452       reg = gen_rtx_UNSPEC (Pmode,
9453                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9454                             UNSPEC_TLS);
9455       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9456
9457       return gen_rtx_PLUS (Pmode, tp, reg);
9458
9459     default:
9460       abort ();
9461     }
9462 }
9463
9464 /* Try machine-dependent ways of modifying an illegitimate address
9465    to be legitimate.  If we find one, return the new, valid address.  */
9466 rtx
9467 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9468 {
9469   if (arm_tls_referenced_p (x))
9470     {
9471       rtx addend = NULL;
9472
9473       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9474         {
9475           addend = XEXP (XEXP (x, 0), 1);
9476           x = XEXP (XEXP (x, 0), 0);
9477         }
9478
9479       if (!SYMBOL_REF_P (x))
9480         return x;
9481
9482       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9483
9484       x = legitimize_tls_address (x, NULL_RTX);
9485
9486       if (addend)
9487         {
9488           x = gen_rtx_PLUS (SImode, x, addend);
9489           orig_x = x;
9490         }
9491       else
9492         return x;
9493     }
9494
9495   if (TARGET_THUMB1)
9496     return thumb_legitimize_address (x, orig_x, mode);
9497
9498   if (GET_CODE (x) == PLUS)
9499     {
9500       rtx xop0 = XEXP (x, 0);
9501       rtx xop1 = XEXP (x, 1);
9502
9503       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9504         xop0 = force_reg (SImode, xop0);
9505
9506       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9507           && !symbol_mentioned_p (xop1))
9508         xop1 = force_reg (SImode, xop1);
9509
9510       if (ARM_BASE_REGISTER_RTX_P (xop0)
9511           && CONST_INT_P (xop1))
9512         {
9513           HOST_WIDE_INT n, low_n;
9514           rtx base_reg, val;
9515           n = INTVAL (xop1);
9516
9517           /* VFP addressing modes actually allow greater offsets, but for
9518              now we just stick with the lowest common denominator.  */
9519           if (mode == DImode || mode == DFmode)
9520             {
9521               low_n = n & 0x0f;
9522               n &= ~0x0f;
9523               if (low_n > 4)
9524                 {
9525                   n += 16;
9526                   low_n -= 16;
9527                 }
9528             }
9529           else
9530             {
9531               low_n = ((mode) == TImode ? 0
9532                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9533               n -= low_n;
9534             }
9535
9536           base_reg = gen_reg_rtx (SImode);
9537           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9538           emit_move_insn (base_reg, val);
9539           x = plus_constant (Pmode, base_reg, low_n);
9540         }
9541       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9542         x = gen_rtx_PLUS (SImode, xop0, xop1);
9543     }
9544
9545   /* XXX We don't allow MINUS any more -- see comment in
9546      arm_legitimate_address_outer_p ().  */
9547   else if (GET_CODE (x) == MINUS)
9548     {
9549       rtx xop0 = XEXP (x, 0);
9550       rtx xop1 = XEXP (x, 1);
9551
9552       if (CONSTANT_P (xop0))
9553         xop0 = force_reg (SImode, xop0);
9554
9555       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9556         xop1 = force_reg (SImode, xop1);
9557
9558       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9559         x = gen_rtx_MINUS (SImode, xop0, xop1);
9560     }
9561
9562   /* Make sure to take full advantage of the pre-indexed addressing mode
9563      with absolute addresses which often allows for the base register to
9564      be factorized for multiple adjacent memory references, and it might
9565      even allows for the mini pool to be avoided entirely. */
9566   else if (CONST_INT_P (x) && optimize > 0)
9567     {
9568       unsigned int bits;
9569       HOST_WIDE_INT mask, base, index;
9570       rtx base_reg;
9571
9572       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9573          only use a 8-bit index. So let's use a 12-bit index for
9574          SImode only and hope that arm_gen_constant will enable LDRB
9575          to use more bits. */
9576       bits = (mode == SImode) ? 12 : 8;
9577       mask = (1 << bits) - 1;
9578       base = INTVAL (x) & ~mask;
9579       index = INTVAL (x) & mask;
9580       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9581         {
9582           /* It'll most probably be more efficient to generate the
9583              base with more bits set and use a negative index instead.
9584              Don't do this for Thumb as negative offsets are much more
9585              limited.  */
9586           base |= mask;
9587           index -= mask;
9588         }
9589       base_reg = force_reg (SImode, GEN_INT (base));
9590       x = plus_constant (Pmode, base_reg, index);
9591     }
9592
9593   if (flag_pic)
9594     {
9595       /* We need to find and carefully transform any SYMBOL and LABEL
9596          references; so go back to the original address expression.  */
9597       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9598                                           false /*compute_now*/);
9599
9600       if (new_x != orig_x)
9601         x = new_x;
9602     }
9603
9604   return x;
9605 }
9606
9607
9608 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9609    to be legitimate.  If we find one, return the new, valid address.  */
9610 rtx
9611 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9612 {
9613   if (GET_CODE (x) == PLUS
9614       && CONST_INT_P (XEXP (x, 1))
9615       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9616           || INTVAL (XEXP (x, 1)) < 0))
9617     {
9618       rtx xop0 = XEXP (x, 0);
9619       rtx xop1 = XEXP (x, 1);
9620       HOST_WIDE_INT offset = INTVAL (xop1);
9621
9622       /* Try and fold the offset into a biasing of the base register and
9623          then offsetting that.  Don't do this when optimizing for space
9624          since it can cause too many CSEs.  */
9625       if (optimize_size && offset >= 0
9626           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9627         {
9628           HOST_WIDE_INT delta;
9629
9630           if (offset >= 256)
9631             delta = offset - (256 - GET_MODE_SIZE (mode));
9632           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9633             delta = 31 * GET_MODE_SIZE (mode);
9634           else
9635             delta = offset & (~31 * GET_MODE_SIZE (mode));
9636
9637           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9638                                 NULL_RTX);
9639           x = plus_constant (Pmode, xop0, delta);
9640         }
9641       else if (offset < 0 && offset > -256)
9642         /* Small negative offsets are best done with a subtract before the
9643            dereference, forcing these into a register normally takes two
9644            instructions.  */
9645         x = force_operand (x, NULL_RTX);
9646       else
9647         {
9648           /* For the remaining cases, force the constant into a register.  */
9649           xop1 = force_reg (SImode, xop1);
9650           x = gen_rtx_PLUS (SImode, xop0, xop1);
9651         }
9652     }
9653   else if (GET_CODE (x) == PLUS
9654            && s_register_operand (XEXP (x, 1), SImode)
9655            && !s_register_operand (XEXP (x, 0), SImode))
9656     {
9657       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9658
9659       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9660     }
9661
9662   if (flag_pic)
9663     {
9664       /* We need to find and carefully transform any SYMBOL and LABEL
9665          references; so go back to the original address expression.  */
9666       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9667                                           false /*compute_now*/);
9668
9669       if (new_x != orig_x)
9670         x = new_x;
9671     }
9672
9673   return x;
9674 }
9675
9676 /* Return TRUE if X contains any TLS symbol references.  */
9677
9678 bool
9679 arm_tls_referenced_p (rtx x)
9680 {
9681   if (! TARGET_HAVE_TLS)
9682     return false;
9683
9684   subrtx_iterator::array_type array;
9685   FOR_EACH_SUBRTX (iter, array, x, ALL)
9686     {
9687       const_rtx x = *iter;
9688       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9689         {
9690           /* ARM currently does not provide relocations to encode TLS variables
9691              into AArch32 instructions, only data, so there is no way to
9692              currently implement these if a literal pool is disabled.  */
9693           if (arm_disable_literal_pool)
9694             sorry ("accessing thread-local storage is not currently supported "
9695                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9696
9697           return true;
9698         }
9699
9700       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9701          TLS offsets, not real symbol references.  */
9702       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9703         iter.skip_subrtxes ();
9704     }
9705   return false;
9706 }
9707
9708 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9709
9710    On the ARM, allow any integer (invalid ones are removed later by insn
9711    patterns), nice doubles and symbol_refs which refer to the function's
9712    constant pool XXX.
9713
9714    When generating pic allow anything.  */
9715
9716 static bool
9717 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9718 {
9719   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9720     return false;
9721
9722   return flag_pic || !label_mentioned_p (x);
9723 }
9724
9725 static bool
9726 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9727 {
9728   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9729      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9730      for ARMv8-M Baseline or later the result is valid.  */
9731   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9732     x = XEXP (x, 0);
9733
9734   return (CONST_INT_P (x)
9735           || CONST_DOUBLE_P (x)
9736           || CONSTANT_ADDRESS_P (x)
9737           || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9738           /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9739              we build the symbol address with upper/lower
9740              relocations.  */
9741           || (TARGET_THUMB1
9742               && !label_mentioned_p (x)
9743               && arm_valid_symbolic_address_p (x)
9744               && arm_disable_literal_pool)
9745           || flag_pic);
9746 }
9747
9748 static bool
9749 arm_legitimate_constant_p (machine_mode mode, rtx x)
9750 {
9751   return (!arm_cannot_force_const_mem (mode, x)
9752           && (TARGET_32BIT
9753               ? arm_legitimate_constant_p_1 (mode, x)
9754               : thumb_legitimate_constant_p (mode, x)));
9755 }
9756
9757 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9758
9759 static bool
9760 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9761 {
9762   rtx base, offset;
9763   split_const (x, &base, &offset);
9764
9765   if (SYMBOL_REF_P (base))
9766     {
9767       /* Function symbols cannot have an offset due to the Thumb bit.  */
9768       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9769           && INTVAL (offset) != 0)
9770         return true;
9771
9772       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9773           && !offset_within_block_p (base, INTVAL (offset)))
9774         return true;
9775     }
9776   return arm_tls_referenced_p (x);
9777 }
9778 \f
9779 #define REG_OR_SUBREG_REG(X)                                            \
9780   (REG_P (X)                                                    \
9781    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9782
9783 #define REG_OR_SUBREG_RTX(X)                    \
9784    (REG_P (X) ? (X) : SUBREG_REG (X))
9785
9786 static inline int
9787 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9788 {
9789   machine_mode mode = GET_MODE (x);
9790   int total, words;
9791
9792   switch (code)
9793     {
9794     case ASHIFT:
9795     case ASHIFTRT:
9796     case LSHIFTRT:
9797     case ROTATERT:
9798       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9799
9800     case PLUS:
9801     case MINUS:
9802     case COMPARE:
9803     case NEG:
9804     case NOT:
9805       return COSTS_N_INSNS (1);
9806
9807     case MULT:
9808       if (arm_arch6m && arm_m_profile_small_mul)
9809         return COSTS_N_INSNS (32);
9810
9811       if (CONST_INT_P (XEXP (x, 1)))
9812         {
9813           int cycles = 0;
9814           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9815
9816           while (i)
9817             {
9818               i >>= 2;
9819               cycles++;
9820             }
9821           return COSTS_N_INSNS (2) + cycles;
9822         }
9823       return COSTS_N_INSNS (1) + 16;
9824
9825     case SET:
9826       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9827          the mode.  */
9828       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9829       return (COSTS_N_INSNS (words)
9830               + 4 * ((MEM_P (SET_SRC (x)))
9831                      + MEM_P (SET_DEST (x))));
9832
9833     case CONST_INT:
9834       if (outer == SET)
9835         {
9836           if (UINTVAL (x) < 256
9837               /* 16-bit constant.  */
9838               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9839             return 0;
9840           if (thumb_shiftable_const (INTVAL (x)))
9841             return COSTS_N_INSNS (2);
9842           return arm_disable_literal_pool
9843             ? COSTS_N_INSNS (8)
9844             : COSTS_N_INSNS (3);
9845         }
9846       else if ((outer == PLUS || outer == COMPARE)
9847                && INTVAL (x) < 256 && INTVAL (x) > -256)
9848         return 0;
9849       else if ((outer == IOR || outer == XOR || outer == AND)
9850                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9851         return COSTS_N_INSNS (1);
9852       else if (outer == AND)
9853         {
9854           int i;
9855           /* This duplicates the tests in the andsi3 expander.  */
9856           for (i = 9; i <= 31; i++)
9857             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9858                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9859               return COSTS_N_INSNS (2);
9860         }
9861       else if (outer == ASHIFT || outer == ASHIFTRT
9862                || outer == LSHIFTRT)
9863         return 0;
9864       return COSTS_N_INSNS (2);
9865
9866     case CONST:
9867     case CONST_DOUBLE:
9868     case LABEL_REF:
9869     case SYMBOL_REF:
9870       return COSTS_N_INSNS (3);
9871
9872     case UDIV:
9873     case UMOD:
9874     case DIV:
9875     case MOD:
9876       return 100;
9877
9878     case TRUNCATE:
9879       return 99;
9880
9881     case AND:
9882     case XOR:
9883     case IOR:
9884       /* XXX guess.  */
9885       return 8;
9886
9887     case MEM:
9888       /* XXX another guess.  */
9889       /* Memory costs quite a lot for the first word, but subsequent words
9890          load at the equivalent of a single insn each.  */
9891       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9892               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9893                  ? 4 : 0));
9894
9895     case IF_THEN_ELSE:
9896       /* XXX a guess.  */
9897       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9898         return 14;
9899       return 2;
9900
9901     case SIGN_EXTEND:
9902     case ZERO_EXTEND:
9903       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9904       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9905
9906       if (mode == SImode)
9907         return total;
9908
9909       if (arm_arch6)
9910         return total + COSTS_N_INSNS (1);
9911
9912       /* Assume a two-shift sequence.  Increase the cost slightly so
9913          we prefer actual shifts over an extend operation.  */
9914       return total + 1 + COSTS_N_INSNS (2);
9915
9916     default:
9917       return 99;
9918     }
9919 }
9920
9921 /* Estimates the size cost of thumb1 instructions.
9922    For now most of the code is copied from thumb1_rtx_costs. We need more
9923    fine grain tuning when we have more related test cases.  */
9924 static inline int
9925 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9926 {
9927   machine_mode mode = GET_MODE (x);
9928   int words, cost;
9929
9930   switch (code)
9931     {
9932     case ASHIFT:
9933     case ASHIFTRT:
9934     case LSHIFTRT:
9935     case ROTATERT:
9936       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9937
9938     case PLUS:
9939     case MINUS:
9940       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9941          defined by RTL expansion, especially for the expansion of
9942          multiplication.  */
9943       if ((GET_CODE (XEXP (x, 0)) == MULT
9944            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9945           || (GET_CODE (XEXP (x, 1)) == MULT
9946               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9947         return COSTS_N_INSNS (2);
9948       /* Fall through.  */
9949     case COMPARE:
9950     case NEG:
9951     case NOT:
9952       return COSTS_N_INSNS (1);
9953
9954     case MULT:
9955       if (CONST_INT_P (XEXP (x, 1)))
9956         {
9957           /* Thumb1 mul instruction can't operate on const. We must Load it
9958              into a register first.  */
9959           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9960           /* For the targets which have a very small and high-latency multiply
9961              unit, we prefer to synthesize the mult with up to 5 instructions,
9962              giving a good balance between size and performance.  */
9963           if (arm_arch6m && arm_m_profile_small_mul)
9964             return COSTS_N_INSNS (5);
9965           else
9966             return COSTS_N_INSNS (1) + const_size;
9967         }
9968       return COSTS_N_INSNS (1);
9969
9970     case SET:
9971       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9972          the mode.  */
9973       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9974       cost = COSTS_N_INSNS (words);
9975       if (satisfies_constraint_J (SET_SRC (x))
9976           || satisfies_constraint_K (SET_SRC (x))
9977              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9978           || (CONST_INT_P (SET_SRC (x))
9979               && UINTVAL (SET_SRC (x)) >= 256
9980               && TARGET_HAVE_MOVT
9981               && satisfies_constraint_j (SET_SRC (x)))
9982              /* thumb1_movdi_insn.  */
9983           || ((words > 1) && MEM_P (SET_SRC (x))))
9984         cost += COSTS_N_INSNS (1);
9985       return cost;
9986
9987     case CONST_INT:
9988       if (outer == SET)
9989         {
9990           if (UINTVAL (x) < 256)
9991             return COSTS_N_INSNS (1);
9992           /* movw is 4byte long.  */
9993           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9994             return COSTS_N_INSNS (2);
9995           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9996           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9997             return COSTS_N_INSNS (2);
9998           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9999           if (thumb_shiftable_const (INTVAL (x)))
10000             return COSTS_N_INSNS (2);
10001           return arm_disable_literal_pool
10002             ? COSTS_N_INSNS (8)
10003             : COSTS_N_INSNS (3);
10004         }
10005       else if ((outer == PLUS || outer == COMPARE)
10006                && INTVAL (x) < 256 && INTVAL (x) > -256)
10007         return 0;
10008       else if ((outer == IOR || outer == XOR || outer == AND)
10009                && INTVAL (x) < 256 && INTVAL (x) >= -256)
10010         return COSTS_N_INSNS (1);
10011       else if (outer == AND)
10012         {
10013           int i;
10014           /* This duplicates the tests in the andsi3 expander.  */
10015           for (i = 9; i <= 31; i++)
10016             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
10017                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
10018               return COSTS_N_INSNS (2);
10019         }
10020       else if (outer == ASHIFT || outer == ASHIFTRT
10021                || outer == LSHIFTRT)
10022         return 0;
10023       return COSTS_N_INSNS (2);
10024
10025     case CONST:
10026     case CONST_DOUBLE:
10027     case LABEL_REF:
10028     case SYMBOL_REF:
10029       return COSTS_N_INSNS (3);
10030
10031     case UDIV:
10032     case UMOD:
10033     case DIV:
10034     case MOD:
10035       return 100;
10036
10037     case TRUNCATE:
10038       return 99;
10039
10040     case AND:
10041     case XOR:
10042     case IOR:
10043       return COSTS_N_INSNS (1);
10044
10045     case MEM:
10046       return (COSTS_N_INSNS (1)
10047               + COSTS_N_INSNS (1)
10048                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10049               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10050                  ? COSTS_N_INSNS (1) : 0));
10051
10052     case IF_THEN_ELSE:
10053       /* XXX a guess.  */
10054       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10055         return 14;
10056       return 2;
10057
10058     case ZERO_EXTEND:
10059       /* XXX still guessing.  */
10060       switch (GET_MODE (XEXP (x, 0)))
10061         {
10062           case E_QImode:
10063             return (1 + (mode == DImode ? 4 : 0)
10064                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10065
10066           case E_HImode:
10067             return (4 + (mode == DImode ? 4 : 0)
10068                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10069
10070           case E_SImode:
10071             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10072
10073           default:
10074             return 99;
10075         }
10076
10077     default:
10078       return 99;
10079     }
10080 }
10081
10082 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
10083    PLUS, adds the carry flag, then return the other operand.  If
10084    neither is a carry, return OP unchanged.  */
10085 static rtx
10086 strip_carry_operation (rtx op)
10087 {
10088   gcc_assert (GET_CODE (op) == PLUS);
10089   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10090     return XEXP (op, 1);
10091   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10092     return XEXP (op, 0);
10093   return op;
10094 }
10095
10096 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
10097    operand, then return the operand that is being shifted.  If the shift
10098    is not by a constant, then set SHIFT_REG to point to the operand.
10099    Return NULL if OP is not a shifter operand.  */
10100 static rtx
10101 shifter_op_p (rtx op, rtx *shift_reg)
10102 {
10103   enum rtx_code code = GET_CODE (op);
10104
10105   if (code == MULT && CONST_INT_P (XEXP (op, 1))
10106       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10107     return XEXP (op, 0);
10108   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10109     return XEXP (op, 0);
10110   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10111            || code == ASHIFTRT)
10112     {
10113       if (!CONST_INT_P (XEXP (op, 1)))
10114         *shift_reg = XEXP (op, 1);
10115       return XEXP (op, 0);
10116     }
10117
10118   return NULL;
10119 }
10120
10121 static bool
10122 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10123 {
10124   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10125   rtx_code code = GET_CODE (x);
10126   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10127
10128   switch (XINT (x, 1))
10129     {
10130     case UNSPEC_UNALIGNED_LOAD:
10131       /* We can only do unaligned loads into the integer unit, and we can't
10132          use LDM or LDRD.  */
10133       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10134       if (speed_p)
10135         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10136                   + extra_cost->ldst.load_unaligned);
10137
10138 #ifdef NOT_YET
10139       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10140                                  ADDR_SPACE_GENERIC, speed_p);
10141 #endif
10142       return true;
10143
10144     case UNSPEC_UNALIGNED_STORE:
10145       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10146       if (speed_p)
10147         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10148                   + extra_cost->ldst.store_unaligned);
10149
10150       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10151 #ifdef NOT_YET
10152       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10153                                  ADDR_SPACE_GENERIC, speed_p);
10154 #endif
10155       return true;
10156
10157     case UNSPEC_VRINTZ:
10158     case UNSPEC_VRINTP:
10159     case UNSPEC_VRINTM:
10160     case UNSPEC_VRINTR:
10161     case UNSPEC_VRINTX:
10162     case UNSPEC_VRINTA:
10163       if (speed_p)
10164         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10165
10166       return true;
10167     default:
10168       *cost = COSTS_N_INSNS (2);
10169       break;
10170     }
10171   return true;
10172 }
10173
10174 /* Cost of a libcall.  We assume one insn per argument, an amount for the
10175    call (one insn for -Os) and then one for processing the result.  */
10176 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10177
10178 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
10179         do                                                              \
10180           {                                                             \
10181             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
10182             if (shift_op != NULL                                        \
10183                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
10184               {                                                         \
10185                 if (shift_reg)                                          \
10186                   {                                                     \
10187                     if (speed_p)                                        \
10188                       *cost += extra_cost->alu.arith_shift_reg;         \
10189                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10190                                        ASHIFT, 1, speed_p);             \
10191                   }                                                     \
10192                 else if (speed_p)                                       \
10193                   *cost += extra_cost->alu.arith_shift;                 \
10194                                                                         \
10195                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
10196                                     ASHIFT, 0, speed_p)                 \
10197                           + rtx_cost (XEXP (x, 1 - IDX),                \
10198                                       GET_MODE (shift_op),              \
10199                                       OP, 1, speed_p));                 \
10200                 return true;                                            \
10201               }                                                         \
10202           }                                                             \
10203         while (0)
10204
10205 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
10206    considering the costs of the addressing mode and memory access
10207    separately.  */
10208 static bool
10209 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10210                int *cost, bool speed_p)
10211 {
10212   machine_mode mode = GET_MODE (x);
10213
10214   *cost = COSTS_N_INSNS (1);
10215
10216   if (flag_pic
10217       && GET_CODE (XEXP (x, 0)) == PLUS
10218       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10219     /* This will be split into two instructions.  Add the cost of the
10220        additional instruction here.  The cost of the memory access is computed
10221        below.  See arm.md:calculate_pic_address.  */
10222     *cost += COSTS_N_INSNS (1);
10223
10224   /* Calculate cost of the addressing mode.  */
10225   if (speed_p)
10226     {
10227       arm_addr_mode_op op_type;
10228       switch (GET_CODE (XEXP (x, 0)))
10229         {
10230         default:
10231         case REG:
10232           op_type = AMO_DEFAULT;
10233           break;
10234         case MINUS:
10235           /* MINUS does not appear in RTL, but the architecture supports it,
10236              so handle this case defensively.  */
10237           /* fall through */
10238         case PLUS:
10239           op_type = AMO_NO_WB;
10240           break;
10241         case PRE_INC:
10242         case PRE_DEC:
10243         case POST_INC:
10244         case POST_DEC:
10245         case PRE_MODIFY:
10246         case POST_MODIFY:
10247           op_type = AMO_WB;
10248           break;
10249         }
10250
10251       if (VECTOR_MODE_P (mode))
10252           *cost += current_tune->addr_mode_costs->vector[op_type];
10253       else if (FLOAT_MODE_P (mode))
10254           *cost += current_tune->addr_mode_costs->fp[op_type];
10255       else
10256           *cost += current_tune->addr_mode_costs->integer[op_type];
10257     }
10258
10259   /* Calculate cost of memory access.  */
10260   if (speed_p)
10261     {
10262       if (FLOAT_MODE_P (mode))
10263         {
10264           if (GET_MODE_SIZE (mode) == 8)
10265             *cost += extra_cost->ldst.loadd;
10266           else
10267             *cost += extra_cost->ldst.loadf;
10268         }
10269       else if (VECTOR_MODE_P (mode))
10270         *cost += extra_cost->ldst.loadv;
10271       else
10272         {
10273           /* Integer modes */
10274           if (GET_MODE_SIZE (mode) == 8)
10275             *cost += extra_cost->ldst.ldrd;
10276           else
10277             *cost += extra_cost->ldst.load;
10278         }
10279     }
10280
10281   return true;
10282 }
10283
10284 /* Helper for arm_bfi_p.  */
10285 static bool
10286 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10287 {
10288   unsigned HOST_WIDE_INT const1;
10289   unsigned HOST_WIDE_INT const2 = 0;
10290
10291   if (!CONST_INT_P (XEXP (op0, 1)))
10292     return false;
10293
10294   const1 = UINTVAL (XEXP (op0, 1));
10295   if (!CONST_INT_P (XEXP (op1, 1))
10296       || ~UINTVAL (XEXP (op1, 1)) != const1)
10297     return false;
10298
10299   if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10300       && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10301     {
10302       const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10303       *sub0 = XEXP (XEXP (op0, 0), 0);
10304     }
10305   else
10306     *sub0 = XEXP (op0, 0);
10307
10308   if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10309     return false;
10310
10311   *sub1 = XEXP (op1, 0);
10312   return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10313 }
10314
10315 /* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
10316    format looks something like:
10317
10318    (IOR (AND (reg1) (~const1))
10319         (AND (ASHIFT (reg2) (const2))
10320              (const1)))
10321
10322    where const1 is a consecutive sequence of 1-bits with the
10323    least-significant non-zero bit starting at bit position const2.  If
10324    const2 is zero, then the shift will not appear at all, due to
10325    canonicalization.  The two arms of the IOR expression may be
10326    flipped.  */
10327 static bool
10328 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10329 {
10330   if (GET_CODE (x) != IOR)
10331     return false;
10332   if (GET_CODE (XEXP (x, 0)) != AND
10333       || GET_CODE (XEXP (x, 1)) != AND)
10334     return false;
10335   return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10336           || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10337 }
10338
10339 /* RTX costs.  Make an estimate of the cost of executing the operation
10340    X, which is contained within an operation with code OUTER_CODE.
10341    SPEED_P indicates whether the cost desired is the performance cost,
10342    or the size cost.  The estimate is stored in COST and the return
10343    value is TRUE if the cost calculation is final, or FALSE if the
10344    caller should recurse through the operands of X to add additional
10345    costs.
10346
10347    We currently make no attempt to model the size savings of Thumb-2
10348    16-bit instructions.  At the normal points in compilation where
10349    this code is called we have no measure of whether the condition
10350    flags are live or not, and thus no realistic way to determine what
10351    the size will eventually be.  */
10352 static bool
10353 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10354                    const struct cpu_cost_table *extra_cost,
10355                    int *cost, bool speed_p)
10356 {
10357   machine_mode mode = GET_MODE (x);
10358
10359   *cost = COSTS_N_INSNS (1);
10360
10361   if (TARGET_THUMB1)
10362     {
10363       if (speed_p)
10364         *cost = thumb1_rtx_costs (x, code, outer_code);
10365       else
10366         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10367       return true;
10368     }
10369
10370   switch (code)
10371     {
10372     case SET:
10373       *cost = 0;
10374       /* SET RTXs don't have a mode so we get it from the destination.  */
10375       mode = GET_MODE (SET_DEST (x));
10376
10377       if (REG_P (SET_SRC (x))
10378           && REG_P (SET_DEST (x)))
10379         {
10380           /* Assume that most copies can be done with a single insn,
10381              unless we don't have HW FP, in which case everything
10382              larger than word mode will require two insns.  */
10383           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10384                                    && GET_MODE_SIZE (mode) > 4)
10385                                   || mode == DImode)
10386                                  ? 2 : 1);
10387           /* Conditional register moves can be encoded
10388              in 16 bits in Thumb mode.  */
10389           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10390             *cost >>= 1;
10391
10392           return true;
10393         }
10394
10395       if (CONST_INT_P (SET_SRC (x)))
10396         {
10397           /* Handle CONST_INT here, since the value doesn't have a mode
10398              and we would otherwise be unable to work out the true cost.  */
10399           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10400                             0, speed_p);
10401           outer_code = SET;
10402           /* Slightly lower the cost of setting a core reg to a constant.
10403              This helps break up chains and allows for better scheduling.  */
10404           if (REG_P (SET_DEST (x))
10405               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10406             *cost -= 1;
10407           x = SET_SRC (x);
10408           /* Immediate moves with an immediate in the range [0, 255] can be
10409              encoded in 16 bits in Thumb mode.  */
10410           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10411               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10412             *cost >>= 1;
10413           goto const_int_cost;
10414         }
10415
10416       return false;
10417
10418     case MEM:
10419       return arm_mem_costs (x, extra_cost, cost, speed_p);
10420
10421     case PARALLEL:
10422     {
10423    /* Calculations of LDM costs are complex.  We assume an initial cost
10424    (ldm_1st) which will load the number of registers mentioned in
10425    ldm_regs_per_insn_1st registers; then each additional
10426    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10427    formula for N regs is thus:
10428
10429    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10430                              + ldm_regs_per_insn_subsequent - 1)
10431                             / ldm_regs_per_insn_subsequent).
10432
10433    Additional costs may also be added for addressing.  A similar
10434    formula is used for STM.  */
10435
10436       bool is_ldm = load_multiple_operation (x, SImode);
10437       bool is_stm = store_multiple_operation (x, SImode);
10438
10439       if (is_ldm || is_stm)
10440         {
10441           if (speed_p)
10442             {
10443               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10444               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10445                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10446                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10447               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10448                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10449                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10450
10451               *cost += regs_per_insn_1st
10452                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10453                                             + regs_per_insn_sub - 1)
10454                                           / regs_per_insn_sub);
10455               return true;
10456             }
10457
10458         }
10459       return false;
10460     }
10461     case DIV:
10462     case UDIV:
10463       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10464           && (mode == SFmode || !TARGET_VFP_SINGLE))
10465         *cost += COSTS_N_INSNS (speed_p
10466                                ? extra_cost->fp[mode != SFmode].div : 0);
10467       else if (mode == SImode && TARGET_IDIV)
10468         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10469       else
10470         *cost = LIBCALL_COST (2);
10471
10472       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10473          possible udiv is prefered.  */
10474       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10475       return false;     /* All arguments must be in registers.  */
10476
10477     case MOD:
10478       /* MOD by a power of 2 can be expanded as:
10479          rsbs    r1, r0, #0
10480          and     r0, r0, #(n - 1)
10481          and     r1, r1, #(n - 1)
10482          rsbpl   r0, r1, #0.  */
10483       if (CONST_INT_P (XEXP (x, 1))
10484           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10485           && mode == SImode)
10486         {
10487           *cost += COSTS_N_INSNS (3);
10488
10489           if (speed_p)
10490             *cost += 2 * extra_cost->alu.logical
10491                      + extra_cost->alu.arith;
10492           return true;
10493         }
10494
10495     /* Fall-through.  */
10496     case UMOD:
10497       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10498          possible udiv is prefered.  */
10499       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10500       return false;     /* All arguments must be in registers.  */
10501
10502     case ROTATE:
10503       if (mode == SImode && REG_P (XEXP (x, 1)))
10504         {
10505           *cost += (COSTS_N_INSNS (1)
10506                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10507           if (speed_p)
10508             *cost += extra_cost->alu.shift_reg;
10509           return true;
10510         }
10511       /* Fall through */
10512     case ROTATERT:
10513     case ASHIFT:
10514     case LSHIFTRT:
10515     case ASHIFTRT:
10516       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10517         {
10518           *cost += (COSTS_N_INSNS (2)
10519                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10520           if (speed_p)
10521             *cost += 2 * extra_cost->alu.shift;
10522           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10523           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10524             *cost += 1;
10525           return true;
10526         }
10527       else if (mode == SImode)
10528         {
10529           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10530           /* Slightly disparage register shifts at -Os, but not by much.  */
10531           if (!CONST_INT_P (XEXP (x, 1)))
10532             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10533                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10534           return true;
10535         }
10536       else if (GET_MODE_CLASS (mode) == MODE_INT
10537                && GET_MODE_SIZE (mode) < 4)
10538         {
10539           if (code == ASHIFT)
10540             {
10541               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10542               /* Slightly disparage register shifts at -Os, but not by
10543                  much.  */
10544               if (!CONST_INT_P (XEXP (x, 1)))
10545                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10546                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10547             }
10548           else if (code == LSHIFTRT || code == ASHIFTRT)
10549             {
10550               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10551                 {
10552                   /* Can use SBFX/UBFX.  */
10553                   if (speed_p)
10554                     *cost += extra_cost->alu.bfx;
10555                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10556                 }
10557               else
10558                 {
10559                   *cost += COSTS_N_INSNS (1);
10560                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10561                   if (speed_p)
10562                     {
10563                       if (CONST_INT_P (XEXP (x, 1)))
10564                         *cost += 2 * extra_cost->alu.shift;
10565                       else
10566                         *cost += (extra_cost->alu.shift
10567                                   + extra_cost->alu.shift_reg);
10568                     }
10569                   else
10570                     /* Slightly disparage register shifts.  */
10571                     *cost += !CONST_INT_P (XEXP (x, 1));
10572                 }
10573             }
10574           else /* Rotates.  */
10575             {
10576               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10577               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10578               if (speed_p)
10579                 {
10580                   if (CONST_INT_P (XEXP (x, 1)))
10581                     *cost += (2 * extra_cost->alu.shift
10582                               + extra_cost->alu.log_shift);
10583                   else
10584                     *cost += (extra_cost->alu.shift
10585                               + extra_cost->alu.shift_reg
10586                               + extra_cost->alu.log_shift_reg);
10587                 }
10588             }
10589           return true;
10590         }
10591
10592       *cost = LIBCALL_COST (2);
10593       return false;
10594
10595     case BSWAP:
10596       if (arm_arch6)
10597         {
10598           if (mode == SImode)
10599             {
10600               if (speed_p)
10601                 *cost += extra_cost->alu.rev;
10602
10603               return false;
10604             }
10605         }
10606       else
10607         {
10608         /* No rev instruction available.  Look at arm_legacy_rev
10609            and thumb_legacy_rev for the form of RTL used then.  */
10610           if (TARGET_THUMB)
10611             {
10612               *cost += COSTS_N_INSNS (9);
10613
10614               if (speed_p)
10615                 {
10616                   *cost += 6 * extra_cost->alu.shift;
10617                   *cost += 3 * extra_cost->alu.logical;
10618                 }
10619             }
10620           else
10621             {
10622               *cost += COSTS_N_INSNS (4);
10623
10624               if (speed_p)
10625                 {
10626                   *cost += 2 * extra_cost->alu.shift;
10627                   *cost += extra_cost->alu.arith_shift;
10628                   *cost += 2 * extra_cost->alu.logical;
10629                 }
10630             }
10631           return true;
10632         }
10633       return false;
10634
10635     case MINUS:
10636       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10637           && (mode == SFmode || !TARGET_VFP_SINGLE))
10638         {
10639           if (GET_CODE (XEXP (x, 0)) == MULT
10640               || GET_CODE (XEXP (x, 1)) == MULT)
10641             {
10642               rtx mul_op0, mul_op1, sub_op;
10643
10644               if (speed_p)
10645                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10646
10647               if (GET_CODE (XEXP (x, 0)) == MULT)
10648                 {
10649                   mul_op0 = XEXP (XEXP (x, 0), 0);
10650                   mul_op1 = XEXP (XEXP (x, 0), 1);
10651                   sub_op = XEXP (x, 1);
10652                 }
10653               else
10654                 {
10655                   mul_op0 = XEXP (XEXP (x, 1), 0);
10656                   mul_op1 = XEXP (XEXP (x, 1), 1);
10657                   sub_op = XEXP (x, 0);
10658                 }
10659
10660               /* The first operand of the multiply may be optionally
10661                  negated.  */
10662               if (GET_CODE (mul_op0) == NEG)
10663                 mul_op0 = XEXP (mul_op0, 0);
10664
10665               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10666                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10667                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10668
10669               return true;
10670             }
10671
10672           if (speed_p)
10673             *cost += extra_cost->fp[mode != SFmode].addsub;
10674           return false;
10675         }
10676
10677       if (mode == SImode)
10678         {
10679           rtx shift_by_reg = NULL;
10680           rtx shift_op;
10681           rtx non_shift_op;
10682           rtx op0 = XEXP (x, 0);
10683           rtx op1 = XEXP (x, 1);
10684
10685           /* Factor out any borrow operation.  There's more than one way
10686              of expressing this; try to recognize them all.  */
10687           if (GET_CODE (op0) == MINUS)
10688             {
10689               if (arm_borrow_operation (op1, SImode))
10690                 {
10691                   op1 = XEXP (op0, 1);
10692                   op0 = XEXP (op0, 0);
10693                 }
10694               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10695                 op0 = XEXP (op0, 0);
10696             }
10697           else if (GET_CODE (op1) == PLUS
10698                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10699             op1 = XEXP (op1, 0);
10700           else if (GET_CODE (op0) == NEG
10701                    && arm_borrow_operation (op1, SImode))
10702             {
10703               /* Negate with carry-in.  For Thumb2 this is done with
10704                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10705                  RSC instruction that exists in Arm mode.  */
10706               if (speed_p)
10707                 *cost += (TARGET_THUMB2
10708                           ? extra_cost->alu.arith_shift
10709                           : extra_cost->alu.arith);
10710               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10711               return true;
10712             }
10713           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10714              Note we do mean ~borrow here.  */
10715           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10716             {
10717               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10718               return true;
10719             }
10720
10721           shift_op = shifter_op_p (op0, &shift_by_reg);
10722           if (shift_op == NULL)
10723             {
10724               shift_op = shifter_op_p (op1, &shift_by_reg);
10725               non_shift_op = op0;
10726             }
10727           else
10728             non_shift_op = op1;
10729
10730           if (shift_op != NULL)
10731             {
10732               if (shift_by_reg != NULL)
10733                 {
10734                   if (speed_p)
10735                     *cost += extra_cost->alu.arith_shift_reg;
10736                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10737                 }
10738               else if (speed_p)
10739                 *cost += extra_cost->alu.arith_shift;
10740
10741               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10742               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10743               return true;
10744             }
10745
10746           if (arm_arch_thumb2
10747               && GET_CODE (XEXP (x, 1)) == MULT)
10748             {
10749               /* MLS.  */
10750               if (speed_p)
10751                 *cost += extra_cost->mult[0].add;
10752               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10753               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10754               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10755               return true;
10756             }
10757
10758           if (CONST_INT_P (op0))
10759             {
10760               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10761                                             INTVAL (op0), NULL_RTX,
10762                                             NULL_RTX, 1, 0);
10763               *cost = COSTS_N_INSNS (insns);
10764               if (speed_p)
10765                 *cost += insns * extra_cost->alu.arith;
10766               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10767               return true;
10768             }
10769           else if (speed_p)
10770             *cost += extra_cost->alu.arith;
10771
10772           /* Don't recurse as we don't want to cost any borrow that
10773              we've stripped.  */
10774           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10775           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10776           return true;
10777         }
10778
10779       if (GET_MODE_CLASS (mode) == MODE_INT
10780           && GET_MODE_SIZE (mode) < 4)
10781         {
10782           rtx shift_op, shift_reg;
10783           shift_reg = NULL;
10784
10785           /* We check both sides of the MINUS for shifter operands since,
10786              unlike PLUS, it's not commutative.  */
10787
10788           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10789           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10790
10791           /* Slightly disparage, as we might need to widen the result.  */
10792           *cost += 1;
10793           if (speed_p)
10794             *cost += extra_cost->alu.arith;
10795
10796           if (CONST_INT_P (XEXP (x, 0)))
10797             {
10798               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10799               return true;
10800             }
10801
10802           return false;
10803         }
10804
10805       if (mode == DImode)
10806         {
10807           *cost += COSTS_N_INSNS (1);
10808
10809           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10810             {
10811               rtx op1 = XEXP (x, 1);
10812
10813               if (speed_p)
10814                 *cost += 2 * extra_cost->alu.arith;
10815
10816               if (GET_CODE (op1) == ZERO_EXTEND)
10817                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10818                                    0, speed_p);
10819               else
10820                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10821               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10822                                  0, speed_p);
10823               return true;
10824             }
10825           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10826             {
10827               if (speed_p)
10828                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10829               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10830                                   0, speed_p)
10831                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10832               return true;
10833             }
10834           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10835                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10836             {
10837               if (speed_p)
10838                 *cost += (extra_cost->alu.arith
10839                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10840                              ? extra_cost->alu.arith
10841                              : extra_cost->alu.arith_shift));
10842               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10843                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10844                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10845               return true;
10846             }
10847
10848           if (speed_p)
10849             *cost += 2 * extra_cost->alu.arith;
10850           return false;
10851         }
10852
10853       /* Vector mode?  */
10854
10855       *cost = LIBCALL_COST (2);
10856       return false;
10857
10858     case PLUS:
10859       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10860           && (mode == SFmode || !TARGET_VFP_SINGLE))
10861         {
10862           if (GET_CODE (XEXP (x, 0)) == MULT)
10863             {
10864               rtx mul_op0, mul_op1, add_op;
10865
10866               if (speed_p)
10867                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10868
10869               mul_op0 = XEXP (XEXP (x, 0), 0);
10870               mul_op1 = XEXP (XEXP (x, 0), 1);
10871               add_op = XEXP (x, 1);
10872
10873               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10874                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10875                         + rtx_cost (add_op, mode, code, 0, speed_p));
10876
10877               return true;
10878             }
10879
10880           if (speed_p)
10881             *cost += extra_cost->fp[mode != SFmode].addsub;
10882           return false;
10883         }
10884       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10885         {
10886           *cost = LIBCALL_COST (2);
10887           return false;
10888         }
10889
10890         /* Narrow modes can be synthesized in SImode, but the range
10891            of useful sub-operations is limited.  Check for shift operations
10892            on one of the operands.  Only left shifts can be used in the
10893            narrow modes.  */
10894       if (GET_MODE_CLASS (mode) == MODE_INT
10895           && GET_MODE_SIZE (mode) < 4)
10896         {
10897           rtx shift_op, shift_reg;
10898           shift_reg = NULL;
10899
10900           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10901
10902           if (CONST_INT_P (XEXP (x, 1)))
10903             {
10904               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10905                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10906                                             NULL_RTX, 1, 0);
10907               *cost = COSTS_N_INSNS (insns);
10908               if (speed_p)
10909                 *cost += insns * extra_cost->alu.arith;
10910               /* Slightly penalize a narrow operation as the result may
10911                  need widening.  */
10912               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10913               return true;
10914             }
10915
10916           /* Slightly penalize a narrow operation as the result may
10917              need widening.  */
10918           *cost += 1;
10919           if (speed_p)
10920             *cost += extra_cost->alu.arith;
10921
10922           return false;
10923         }
10924
10925       if (mode == SImode)
10926         {
10927           rtx shift_op, shift_reg;
10928
10929           if (TARGET_INT_SIMD
10930               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10931                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10932             {
10933               /* UXTA[BH] or SXTA[BH].  */
10934               if (speed_p)
10935                 *cost += extra_cost->alu.extend_arith;
10936               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10937                                   0, speed_p)
10938                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10939               return true;
10940             }
10941
10942           rtx op0 = XEXP (x, 0);
10943           rtx op1 = XEXP (x, 1);
10944
10945           /* Handle a side effect of adding in the carry to an addition.  */
10946           if (GET_CODE (op0) == PLUS
10947               && arm_carry_operation (op1, mode))
10948             {
10949               op1 = XEXP (op0, 1);
10950               op0 = XEXP (op0, 0);
10951             }
10952           else if (GET_CODE (op1) == PLUS
10953                    && arm_carry_operation (op0, mode))
10954             {
10955               op0 = XEXP (op1, 0);
10956               op1 = XEXP (op1, 1);
10957             }
10958           else if (GET_CODE (op0) == PLUS)
10959             {
10960               op0 = strip_carry_operation (op0);
10961               if (swap_commutative_operands_p (op0, op1))
10962                 std::swap (op0, op1);
10963             }
10964
10965           if (arm_carry_operation (op0, mode))
10966             {
10967               /* Adding the carry to a register is a canonicalization of
10968                  adding 0 to the register plus the carry.  */
10969               if (speed_p)
10970                 *cost += extra_cost->alu.arith;
10971               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10972               return true;
10973             }
10974
10975           shift_reg = NULL;
10976           shift_op = shifter_op_p (op0, &shift_reg);
10977           if (shift_op != NULL)
10978             {
10979               if (shift_reg)
10980                 {
10981                   if (speed_p)
10982                     *cost += extra_cost->alu.arith_shift_reg;
10983                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10984                 }
10985               else if (speed_p)
10986                 *cost += extra_cost->alu.arith_shift;
10987
10988               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10989                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10990               return true;
10991             }
10992
10993           if (GET_CODE (op0) == MULT)
10994             {
10995               rtx mul_op = op0;
10996
10997               if (TARGET_DSP_MULTIPLY
10998                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10999                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11000                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11001                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11002                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
11003                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
11004                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
11005                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
11006                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11007                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11008                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11009                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
11010                                       == 16))))))
11011                 {
11012                   /* SMLA[BT][BT].  */
11013                   if (speed_p)
11014                     *cost += extra_cost->mult[0].extend_add;
11015                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
11016                                       SIGN_EXTEND, 0, speed_p)
11017                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
11018                                         SIGN_EXTEND, 0, speed_p)
11019                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
11020                   return true;
11021                 }
11022
11023               if (speed_p)
11024                 *cost += extra_cost->mult[0].add;
11025               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
11026                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
11027                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
11028               return true;
11029             }
11030
11031           if (CONST_INT_P (op1))
11032             {
11033               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11034                                             INTVAL (op1), NULL_RTX,
11035                                             NULL_RTX, 1, 0);
11036               *cost = COSTS_N_INSNS (insns);
11037               if (speed_p)
11038                 *cost += insns * extra_cost->alu.arith;
11039               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11040               return true;
11041             }
11042
11043           if (speed_p)
11044             *cost += extra_cost->alu.arith;
11045
11046           /* Don't recurse here because we want to test the operands
11047              without any carry operation.  */
11048           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11049           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11050           return true;
11051         }
11052
11053       if (mode == DImode)
11054         {
11055           if (GET_CODE (XEXP (x, 0)) == MULT
11056               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11057                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11058                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11059                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11060             {
11061               if (speed_p)
11062                 *cost += extra_cost->mult[1].extend_add;
11063               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11064                                   ZERO_EXTEND, 0, speed_p)
11065                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11066                                     ZERO_EXTEND, 0, speed_p)
11067                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11068               return true;
11069             }
11070
11071           *cost += COSTS_N_INSNS (1);
11072
11073           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11074               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11075             {
11076               if (speed_p)
11077                 *cost += (extra_cost->alu.arith
11078                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11079                              ? extra_cost->alu.arith
11080                              : extra_cost->alu.arith_shift));
11081
11082               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11083                                   0, speed_p)
11084                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11085               return true;
11086             }
11087
11088           if (speed_p)
11089             *cost += 2 * extra_cost->alu.arith;
11090           return false;
11091         }
11092
11093       /* Vector mode?  */
11094       *cost = LIBCALL_COST (2);
11095       return false;
11096     case IOR:
11097       {
11098         rtx sub0, sub1;
11099         if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11100           {
11101             if (speed_p)
11102               *cost += extra_cost->alu.rev;
11103
11104             return true;
11105           }
11106         else if (mode == SImode && arm_arch_thumb2
11107                  && arm_bfi_p (x, &sub0, &sub1))
11108           {
11109             *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11110             *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11111             if (speed_p)
11112               *cost += extra_cost->alu.bfi;
11113
11114             return true;
11115           }
11116       }
11117
11118       /* Fall through.  */
11119     case AND: case XOR:
11120       if (mode == SImode)
11121         {
11122           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11123           rtx op0 = XEXP (x, 0);
11124           rtx shift_op, shift_reg;
11125
11126           if (subcode == NOT
11127               && (code == AND
11128                   || (code == IOR && TARGET_THUMB2)))
11129             op0 = XEXP (op0, 0);
11130
11131           shift_reg = NULL;
11132           shift_op = shifter_op_p (op0, &shift_reg);
11133           if (shift_op != NULL)
11134             {
11135               if (shift_reg)
11136                 {
11137                   if (speed_p)
11138                     *cost += extra_cost->alu.log_shift_reg;
11139                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11140                 }
11141               else if (speed_p)
11142                 *cost += extra_cost->alu.log_shift;
11143
11144               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11145                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11146               return true;
11147             }
11148
11149           if (CONST_INT_P (XEXP (x, 1)))
11150             {
11151               int insns = arm_gen_constant (code, SImode, NULL_RTX,
11152                                             INTVAL (XEXP (x, 1)), NULL_RTX,
11153                                             NULL_RTX, 1, 0);
11154
11155               *cost = COSTS_N_INSNS (insns);
11156               if (speed_p)
11157                 *cost += insns * extra_cost->alu.logical;
11158               *cost += rtx_cost (op0, mode, code, 0, speed_p);
11159               return true;
11160             }
11161
11162           if (speed_p)
11163             *cost += extra_cost->alu.logical;
11164           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11165                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11166           return true;
11167         }
11168
11169       if (mode == DImode)
11170         {
11171           rtx op0 = XEXP (x, 0);
11172           enum rtx_code subcode = GET_CODE (op0);
11173
11174           *cost += COSTS_N_INSNS (1);
11175
11176           if (subcode == NOT
11177               && (code == AND
11178                   || (code == IOR && TARGET_THUMB2)))
11179             op0 = XEXP (op0, 0);
11180
11181           if (GET_CODE (op0) == ZERO_EXTEND)
11182             {
11183               if (speed_p)
11184                 *cost += 2 * extra_cost->alu.logical;
11185
11186               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11187                                   0, speed_p)
11188                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11189               return true;
11190             }
11191           else if (GET_CODE (op0) == SIGN_EXTEND)
11192             {
11193               if (speed_p)
11194                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11195
11196               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11197                                   0, speed_p)
11198                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11199               return true;
11200             }
11201
11202           if (speed_p)
11203             *cost += 2 * extra_cost->alu.logical;
11204
11205           return true;
11206         }
11207       /* Vector mode?  */
11208
11209       *cost = LIBCALL_COST (2);
11210       return false;
11211
11212     case MULT:
11213       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11214           && (mode == SFmode || !TARGET_VFP_SINGLE))
11215         {
11216           rtx op0 = XEXP (x, 0);
11217
11218           if (GET_CODE (op0) == NEG && !flag_rounding_math)
11219             op0 = XEXP (op0, 0);
11220
11221           if (speed_p)
11222             *cost += extra_cost->fp[mode != SFmode].mult;
11223
11224           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11225                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11226           return true;
11227         }
11228       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11229         {
11230           *cost = LIBCALL_COST (2);
11231           return false;
11232         }
11233
11234       if (mode == SImode)
11235         {
11236           if (TARGET_DSP_MULTIPLY
11237               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11238                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11239                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11240                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11241                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11242                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11243                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11244                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11245                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11246                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11247                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11248                               && (INTVAL (XEXP (XEXP (x, 1), 1))
11249                                   == 16))))))
11250             {
11251               /* SMUL[TB][TB].  */
11252               if (speed_p)
11253                 *cost += extra_cost->mult[0].extend;
11254               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11255                                  SIGN_EXTEND, 0, speed_p);
11256               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11257                                  SIGN_EXTEND, 1, speed_p);
11258               return true;
11259             }
11260           if (speed_p)
11261             *cost += extra_cost->mult[0].simple;
11262           return false;
11263         }
11264
11265       if (mode == DImode)
11266         {
11267           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11268                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11269                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11270                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11271             {
11272               if (speed_p)
11273                 *cost += extra_cost->mult[1].extend;
11274               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11275                                   ZERO_EXTEND, 0, speed_p)
11276                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11277                                     ZERO_EXTEND, 0, speed_p));
11278               return true;
11279             }
11280
11281           *cost = LIBCALL_COST (2);
11282           return false;
11283         }
11284
11285       /* Vector mode?  */
11286       *cost = LIBCALL_COST (2);
11287       return false;
11288
11289     case NEG:
11290       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11291           && (mode == SFmode || !TARGET_VFP_SINGLE))
11292         {
11293           if (GET_CODE (XEXP (x, 0)) == MULT)
11294             {
11295               /* VNMUL.  */
11296               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11297               return true;
11298             }
11299
11300           if (speed_p)
11301             *cost += extra_cost->fp[mode != SFmode].neg;
11302
11303           return false;
11304         }
11305       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11306         {
11307           *cost = LIBCALL_COST (1);
11308           return false;
11309         }
11310
11311       if (mode == SImode)
11312         {
11313           if (GET_CODE (XEXP (x, 0)) == ABS)
11314             {
11315               *cost += COSTS_N_INSNS (1);
11316               /* Assume the non-flag-changing variant.  */
11317               if (speed_p)
11318                 *cost += (extra_cost->alu.log_shift
11319                           + extra_cost->alu.arith_shift);
11320               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11321               return true;
11322             }
11323
11324           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11325               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11326             {
11327               *cost += COSTS_N_INSNS (1);
11328               /* No extra cost for MOV imm and MVN imm.  */
11329               /* If the comparison op is using the flags, there's no further
11330                  cost, otherwise we need to add the cost of the comparison.  */
11331               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11332                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11333                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11334                 {
11335                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11336                   *cost += (COSTS_N_INSNS (1)
11337                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11338                                         0, speed_p)
11339                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11340                                         1, speed_p));
11341                   if (speed_p)
11342                     *cost += extra_cost->alu.arith;
11343                 }
11344               return true;
11345             }
11346
11347           if (speed_p)
11348             *cost += extra_cost->alu.arith;
11349           return false;
11350         }
11351
11352       if (GET_MODE_CLASS (mode) == MODE_INT
11353           && GET_MODE_SIZE (mode) < 4)
11354         {
11355           /* Slightly disparage, as we might need an extend operation.  */
11356           *cost += 1;
11357           if (speed_p)
11358             *cost += extra_cost->alu.arith;
11359           return false;
11360         }
11361
11362       if (mode == DImode)
11363         {
11364           *cost += COSTS_N_INSNS (1);
11365           if (speed_p)
11366             *cost += 2 * extra_cost->alu.arith;
11367           return false;
11368         }
11369
11370       /* Vector mode?  */
11371       *cost = LIBCALL_COST (1);
11372       return false;
11373
11374     case NOT:
11375       if (mode == SImode)
11376         {
11377           rtx shift_op;
11378           rtx shift_reg = NULL;
11379
11380           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11381
11382           if (shift_op)
11383             {
11384               if (shift_reg != NULL)
11385                 {
11386                   if (speed_p)
11387                     *cost += extra_cost->alu.log_shift_reg;
11388                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11389                 }
11390               else if (speed_p)
11391                 *cost += extra_cost->alu.log_shift;
11392               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11393               return true;
11394             }
11395
11396           if (speed_p)
11397             *cost += extra_cost->alu.logical;
11398           return false;
11399         }
11400       if (mode == DImode)
11401         {
11402           *cost += COSTS_N_INSNS (1);
11403           return false;
11404         }
11405
11406       /* Vector mode?  */
11407
11408       *cost += LIBCALL_COST (1);
11409       return false;
11410
11411     case IF_THEN_ELSE:
11412       {
11413         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11414           {
11415             *cost += COSTS_N_INSNS (3);
11416             return true;
11417           }
11418         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11419         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11420
11421         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11422         /* Assume that if one arm of the if_then_else is a register,
11423            that it will be tied with the result and eliminate the
11424            conditional insn.  */
11425         if (REG_P (XEXP (x, 1)))
11426           *cost += op2cost;
11427         else if (REG_P (XEXP (x, 2)))
11428           *cost += op1cost;
11429         else
11430           {
11431             if (speed_p)
11432               {
11433                 if (extra_cost->alu.non_exec_costs_exec)
11434                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11435                 else
11436                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11437               }
11438             else
11439               *cost += op1cost + op2cost;
11440           }
11441       }
11442       return true;
11443
11444     case COMPARE:
11445       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11446         *cost = 0;
11447       else
11448         {
11449           machine_mode op0mode;
11450           /* We'll mostly assume that the cost of a compare is the cost of the
11451              LHS.  However, there are some notable exceptions.  */
11452
11453           /* Floating point compares are never done as side-effects.  */
11454           op0mode = GET_MODE (XEXP (x, 0));
11455           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11456               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11457             {
11458               if (speed_p)
11459                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11460
11461               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11462                 {
11463                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11464                   return true;
11465                 }
11466
11467               return false;
11468             }
11469           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11470             {
11471               *cost = LIBCALL_COST (2);
11472               return false;
11473             }
11474
11475           /* DImode compares normally take two insns.  */
11476           if (op0mode == DImode)
11477             {
11478               *cost += COSTS_N_INSNS (1);
11479               if (speed_p)
11480                 *cost += 2 * extra_cost->alu.arith;
11481               return false;
11482             }
11483
11484           if (op0mode == SImode)
11485             {
11486               rtx shift_op;
11487               rtx shift_reg;
11488
11489               if (XEXP (x, 1) == const0_rtx
11490                   && !(REG_P (XEXP (x, 0))
11491                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11492                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11493                 {
11494                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11495
11496                   /* Multiply operations that set the flags are often
11497                      significantly more expensive.  */
11498                   if (speed_p
11499                       && GET_CODE (XEXP (x, 0)) == MULT
11500                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11501                     *cost += extra_cost->mult[0].flag_setting;
11502
11503                   if (speed_p
11504                       && GET_CODE (XEXP (x, 0)) == PLUS
11505                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11506                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11507                                                             0), 1), mode))
11508                     *cost += extra_cost->mult[0].flag_setting;
11509                   return true;
11510                 }
11511
11512               shift_reg = NULL;
11513               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11514               if (shift_op != NULL)
11515                 {
11516                   if (shift_reg != NULL)
11517                     {
11518                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11519                                          1, speed_p);
11520                       if (speed_p)
11521                         *cost += extra_cost->alu.arith_shift_reg;
11522                     }
11523                   else if (speed_p)
11524                     *cost += extra_cost->alu.arith_shift;
11525                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11526                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11527                   return true;
11528                 }
11529
11530               if (speed_p)
11531                 *cost += extra_cost->alu.arith;
11532               if (CONST_INT_P (XEXP (x, 1))
11533                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11534                 {
11535                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11536                   return true;
11537                 }
11538               return false;
11539             }
11540
11541           /* Vector mode?  */
11542
11543           *cost = LIBCALL_COST (2);
11544           return false;
11545         }
11546       return true;
11547
11548     case EQ:
11549     case GE:
11550     case GT:
11551     case LE:
11552     case LT:
11553       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11554          vcle and vclt). */
11555       if (TARGET_NEON
11556           && TARGET_HARD_FLOAT
11557           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11558           && (XEXP (x, 1) == CONST0_RTX (mode)))
11559         {
11560           *cost = 0;
11561           return true;
11562         }
11563
11564       /* Fall through.  */
11565     case NE:
11566     case LTU:
11567     case LEU:
11568     case GEU:
11569     case GTU:
11570     case ORDERED:
11571     case UNORDERED:
11572     case UNEQ:
11573     case UNLE:
11574     case UNLT:
11575     case UNGE:
11576     case UNGT:
11577     case LTGT:
11578       if (outer_code == SET)
11579         {
11580           /* Is it a store-flag operation?  */
11581           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11582               && XEXP (x, 1) == const0_rtx)
11583             {
11584               /* Thumb also needs an IT insn.  */
11585               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11586               return true;
11587             }
11588           if (XEXP (x, 1) == const0_rtx)
11589             {
11590               switch (code)
11591                 {
11592                 case LT:
11593                   /* LSR Rd, Rn, #31.  */
11594                   if (speed_p)
11595                     *cost += extra_cost->alu.shift;
11596                   break;
11597
11598                 case EQ:
11599                   /* RSBS T1, Rn, #0
11600                      ADC  Rd, Rn, T1.  */
11601
11602                 case NE:
11603                   /* SUBS T1, Rn, #1
11604                      SBC  Rd, Rn, T1.  */
11605                   *cost += COSTS_N_INSNS (1);
11606                   break;
11607
11608                 case LE:
11609                   /* RSBS T1, Rn, Rn, LSR #31
11610                      ADC  Rd, Rn, T1. */
11611                   *cost += COSTS_N_INSNS (1);
11612                   if (speed_p)
11613                     *cost += extra_cost->alu.arith_shift;
11614                   break;
11615
11616                 case GT:
11617                   /* RSB  Rd, Rn, Rn, ASR #1
11618                      LSR  Rd, Rd, #31.  */
11619                   *cost += COSTS_N_INSNS (1);
11620                   if (speed_p)
11621                     *cost += (extra_cost->alu.arith_shift
11622                               + extra_cost->alu.shift);
11623                   break;
11624
11625                 case GE:
11626                   /* ASR  Rd, Rn, #31
11627                      ADD  Rd, Rn, #1.  */
11628                   *cost += COSTS_N_INSNS (1);
11629                   if (speed_p)
11630                     *cost += extra_cost->alu.shift;
11631                   break;
11632
11633                 default:
11634                   /* Remaining cases are either meaningless or would take
11635                      three insns anyway.  */
11636                   *cost = COSTS_N_INSNS (3);
11637                   break;
11638                 }
11639               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11640               return true;
11641             }
11642           else
11643             {
11644               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11645               if (CONST_INT_P (XEXP (x, 1))
11646                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11647                 {
11648                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11649                   return true;
11650                 }
11651
11652               return false;
11653             }
11654         }
11655       /* Not directly inside a set.  If it involves the condition code
11656          register it must be the condition for a branch, cond_exec or
11657          I_T_E operation.  Since the comparison is performed elsewhere
11658          this is just the control part which has no additional
11659          cost.  */
11660       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11661                && XEXP (x, 1) == const0_rtx)
11662         {
11663           *cost = 0;
11664           return true;
11665         }
11666       return false;
11667
11668     case ABS:
11669       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11670           && (mode == SFmode || !TARGET_VFP_SINGLE))
11671         {
11672           if (speed_p)
11673             *cost += extra_cost->fp[mode != SFmode].neg;
11674
11675           return false;
11676         }
11677       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11678         {
11679           *cost = LIBCALL_COST (1);
11680           return false;
11681         }
11682
11683       if (mode == SImode)
11684         {
11685           if (speed_p)
11686             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11687           return false;
11688         }
11689       /* Vector mode?  */
11690       *cost = LIBCALL_COST (1);
11691       return false;
11692
11693     case SIGN_EXTEND:
11694       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11695           && MEM_P (XEXP (x, 0)))
11696         {
11697           if (mode == DImode)
11698             *cost += COSTS_N_INSNS (1);
11699
11700           if (!speed_p)
11701             return true;
11702
11703           if (GET_MODE (XEXP (x, 0)) == SImode)
11704             *cost += extra_cost->ldst.load;
11705           else
11706             *cost += extra_cost->ldst.load_sign_extend;
11707
11708           if (mode == DImode)
11709             *cost += extra_cost->alu.shift;
11710
11711           return true;
11712         }
11713
11714       /* Widening from less than 32-bits requires an extend operation.  */
11715       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11716         {
11717           /* We have SXTB/SXTH.  */
11718           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11719           if (speed_p)
11720             *cost += extra_cost->alu.extend;
11721         }
11722       else if (GET_MODE (XEXP (x, 0)) != SImode)
11723         {
11724           /* Needs two shifts.  */
11725           *cost += COSTS_N_INSNS (1);
11726           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11727           if (speed_p)
11728             *cost += 2 * extra_cost->alu.shift;
11729         }
11730
11731       /* Widening beyond 32-bits requires one more insn.  */
11732       if (mode == DImode)
11733         {
11734           *cost += COSTS_N_INSNS (1);
11735           if (speed_p)
11736             *cost += extra_cost->alu.shift;
11737         }
11738
11739       return true;
11740
11741     case ZERO_EXTEND:
11742       if ((arm_arch4
11743            || GET_MODE (XEXP (x, 0)) == SImode
11744            || GET_MODE (XEXP (x, 0)) == QImode)
11745           && MEM_P (XEXP (x, 0)))
11746         {
11747           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11748
11749           if (mode == DImode)
11750             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11751
11752           return true;
11753         }
11754
11755       /* Widening from less than 32-bits requires an extend operation.  */
11756       if (GET_MODE (XEXP (x, 0)) == QImode)
11757         {
11758           /* UXTB can be a shorter instruction in Thumb2, but it might
11759              be slower than the AND Rd, Rn, #255 alternative.  When
11760              optimizing for speed it should never be slower to use
11761              AND, and we don't really model 16-bit vs 32-bit insns
11762              here.  */
11763           if (speed_p)
11764             *cost += extra_cost->alu.logical;
11765         }
11766       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11767         {
11768           /* We have UXTB/UXTH.  */
11769           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11770           if (speed_p)
11771             *cost += extra_cost->alu.extend;
11772         }
11773       else if (GET_MODE (XEXP (x, 0)) != SImode)
11774         {
11775           /* Needs two shifts.  It's marginally preferable to use
11776              shifts rather than two BIC instructions as the second
11777              shift may merge with a subsequent insn as a shifter
11778              op.  */
11779           *cost = COSTS_N_INSNS (2);
11780           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11781           if (speed_p)
11782             *cost += 2 * extra_cost->alu.shift;
11783         }
11784
11785       /* Widening beyond 32-bits requires one more insn.  */
11786       if (mode == DImode)
11787         {
11788           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11789         }
11790
11791       return true;
11792
11793     case CONST_INT:
11794       *cost = 0;
11795       /* CONST_INT has no mode, so we cannot tell for sure how many
11796          insns are really going to be needed.  The best we can do is
11797          look at the value passed.  If it fits in SImode, then assume
11798          that's the mode it will be used for.  Otherwise assume it
11799          will be used in DImode.  */
11800       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11801         mode = SImode;
11802       else
11803         mode = DImode;
11804
11805       /* Avoid blowing up in arm_gen_constant ().  */
11806       if (!(outer_code == PLUS
11807             || outer_code == AND
11808             || outer_code == IOR
11809             || outer_code == XOR
11810             || outer_code == MINUS))
11811         outer_code = SET;
11812
11813     const_int_cost:
11814       if (mode == SImode)
11815         {
11816           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11817                                                     INTVAL (x), NULL, NULL,
11818                                                     0, 0));
11819           /* Extra costs?  */
11820         }
11821       else
11822         {
11823           *cost += COSTS_N_INSNS (arm_gen_constant
11824                                   (outer_code, SImode, NULL,
11825                                    trunc_int_for_mode (INTVAL (x), SImode),
11826                                    NULL, NULL, 0, 0)
11827                                   + arm_gen_constant (outer_code, SImode, NULL,
11828                                                       INTVAL (x) >> 32, NULL,
11829                                                       NULL, 0, 0));
11830           /* Extra costs?  */
11831         }
11832
11833       return true;
11834
11835     case CONST:
11836     case LABEL_REF:
11837     case SYMBOL_REF:
11838       if (speed_p)
11839         {
11840           if (arm_arch_thumb2 && !flag_pic)
11841             *cost += COSTS_N_INSNS (1);
11842           else
11843             *cost += extra_cost->ldst.load;
11844         }
11845       else
11846         *cost += COSTS_N_INSNS (1);
11847
11848       if (flag_pic)
11849         {
11850           *cost += COSTS_N_INSNS (1);
11851           if (speed_p)
11852             *cost += extra_cost->alu.arith;
11853         }
11854
11855       return true;
11856
11857     case CONST_FIXED:
11858       *cost = COSTS_N_INSNS (4);
11859       /* Fixme.  */
11860       return true;
11861
11862     case CONST_DOUBLE:
11863       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11864           && (mode == SFmode || !TARGET_VFP_SINGLE))
11865         {
11866           if (vfp3_const_double_rtx (x))
11867             {
11868               if (speed_p)
11869                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11870               return true;
11871             }
11872
11873           if (speed_p)
11874             {
11875               if (mode == DFmode)
11876                 *cost += extra_cost->ldst.loadd;
11877               else
11878                 *cost += extra_cost->ldst.loadf;
11879             }
11880           else
11881             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11882
11883           return true;
11884         }
11885       *cost = COSTS_N_INSNS (4);
11886       return true;
11887
11888     case CONST_VECTOR:
11889       /* Fixme.  */
11890       if (((TARGET_NEON && TARGET_HARD_FLOAT
11891             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11892            || TARGET_HAVE_MVE)
11893           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11894         *cost = COSTS_N_INSNS (1);
11895       else
11896         *cost = COSTS_N_INSNS (4);
11897       return true;
11898
11899     case HIGH:
11900     case LO_SUM:
11901       /* When optimizing for size, we prefer constant pool entries to
11902          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11903       if (!speed_p)
11904         *cost += 1;
11905       return true;
11906
11907     case CLZ:
11908       if (speed_p)
11909         *cost += extra_cost->alu.clz;
11910       return false;
11911
11912     case SMIN:
11913       if (XEXP (x, 1) == const0_rtx)
11914         {
11915           if (speed_p)
11916             *cost += extra_cost->alu.log_shift;
11917           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11918           return true;
11919         }
11920       /* Fall through.  */
11921     case SMAX:
11922     case UMIN:
11923     case UMAX:
11924       *cost += COSTS_N_INSNS (1);
11925       return false;
11926
11927     case TRUNCATE:
11928       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11929           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11930           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11931           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11932           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11933                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11934               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11935                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11936                       == ZERO_EXTEND))))
11937         {
11938           if (speed_p)
11939             *cost += extra_cost->mult[1].extend;
11940           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11941                               ZERO_EXTEND, 0, speed_p)
11942                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11943                                 ZERO_EXTEND, 0, speed_p));
11944           return true;
11945         }
11946       *cost = LIBCALL_COST (1);
11947       return false;
11948
11949     case UNSPEC_VOLATILE:
11950     case UNSPEC:
11951       return arm_unspec_cost (x, outer_code, speed_p, cost);
11952
11953     case PC:
11954       /* Reading the PC is like reading any other register.  Writing it
11955          is more expensive, but we take that into account elsewhere.  */
11956       *cost = 0;
11957       return true;
11958
11959     case ZERO_EXTRACT:
11960       /* TODO: Simple zero_extract of bottom bits using AND.  */
11961       /* Fall through.  */
11962     case SIGN_EXTRACT:
11963       if (arm_arch6
11964           && mode == SImode
11965           && CONST_INT_P (XEXP (x, 1))
11966           && CONST_INT_P (XEXP (x, 2)))
11967         {
11968           if (speed_p)
11969             *cost += extra_cost->alu.bfx;
11970           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11971           return true;
11972         }
11973       /* Without UBFX/SBFX, need to resort to shift operations.  */
11974       *cost += COSTS_N_INSNS (1);
11975       if (speed_p)
11976         *cost += 2 * extra_cost->alu.shift;
11977       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11978       return true;
11979
11980     case FLOAT_EXTEND:
11981       if (TARGET_HARD_FLOAT)
11982         {
11983           if (speed_p)
11984             *cost += extra_cost->fp[mode == DFmode].widen;
11985           if (!TARGET_VFP5
11986               && GET_MODE (XEXP (x, 0)) == HFmode)
11987             {
11988               /* Pre v8, widening HF->DF is a two-step process, first
11989                  widening to SFmode.  */
11990               *cost += COSTS_N_INSNS (1);
11991               if (speed_p)
11992                 *cost += extra_cost->fp[0].widen;
11993             }
11994           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11995           return true;
11996         }
11997
11998       *cost = LIBCALL_COST (1);
11999       return false;
12000
12001     case FLOAT_TRUNCATE:
12002       if (TARGET_HARD_FLOAT)
12003         {
12004           if (speed_p)
12005             *cost += extra_cost->fp[mode == DFmode].narrow;
12006           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12007           return true;
12008           /* Vector modes?  */
12009         }
12010       *cost = LIBCALL_COST (1);
12011       return false;
12012
12013     case FMA:
12014       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
12015         {
12016           rtx op0 = XEXP (x, 0);
12017           rtx op1 = XEXP (x, 1);
12018           rtx op2 = XEXP (x, 2);
12019
12020
12021           /* vfms or vfnma.  */
12022           if (GET_CODE (op0) == NEG)
12023             op0 = XEXP (op0, 0);
12024
12025           /* vfnms or vfnma.  */
12026           if (GET_CODE (op2) == NEG)
12027             op2 = XEXP (op2, 0);
12028
12029           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12030           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12031           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12032
12033           if (speed_p)
12034             *cost += extra_cost->fp[mode ==DFmode].fma;
12035
12036           return true;
12037         }
12038
12039       *cost = LIBCALL_COST (3);
12040       return false;
12041
12042     case FIX:
12043     case UNSIGNED_FIX:
12044       if (TARGET_HARD_FLOAT)
12045         {
12046           /* The *combine_vcvtf2i reduces a vmul+vcvt into
12047              a vcvt fixed-point conversion.  */
12048           if (code == FIX && mode == SImode
12049               && GET_CODE (XEXP (x, 0)) == FIX
12050               && GET_MODE (XEXP (x, 0)) == SFmode
12051               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12052               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12053                  > 0)
12054             {
12055               if (speed_p)
12056                 *cost += extra_cost->fp[0].toint;
12057
12058               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12059                                  code, 0, speed_p);
12060               return true;
12061             }
12062
12063           if (GET_MODE_CLASS (mode) == MODE_INT)
12064             {
12065               mode = GET_MODE (XEXP (x, 0));
12066               if (speed_p)
12067                 *cost += extra_cost->fp[mode == DFmode].toint;
12068               /* Strip of the 'cost' of rounding towards zero.  */
12069               if (GET_CODE (XEXP (x, 0)) == FIX)
12070                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12071                                    0, speed_p);
12072               else
12073                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12074               /* ??? Increase the cost to deal with transferring from
12075                  FP -> CORE registers?  */
12076               return true;
12077             }
12078           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12079                    && TARGET_VFP5)
12080             {
12081               if (speed_p)
12082                 *cost += extra_cost->fp[mode == DFmode].roundint;
12083               return false;
12084             }
12085           /* Vector costs? */
12086         }
12087       *cost = LIBCALL_COST (1);
12088       return false;
12089
12090     case FLOAT:
12091     case UNSIGNED_FLOAT:
12092       if (TARGET_HARD_FLOAT)
12093         {
12094           /* ??? Increase the cost to deal with transferring from CORE
12095              -> FP registers?  */
12096           if (speed_p)
12097             *cost += extra_cost->fp[mode == DFmode].fromint;
12098           return false;
12099         }
12100       *cost = LIBCALL_COST (1);
12101       return false;
12102
12103     case CALL:
12104       return true;
12105
12106     case ASM_OPERANDS:
12107       {
12108       /* Just a guess.  Guess number of instructions in the asm
12109          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
12110          though (see PR60663).  */
12111         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12112         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12113
12114         *cost = COSTS_N_INSNS (asm_length + num_operands);
12115         return true;
12116       }
12117     default:
12118       if (mode != VOIDmode)
12119         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12120       else
12121         *cost = COSTS_N_INSNS (4); /* Who knows?  */
12122       return false;
12123     }
12124 }
12125
12126 #undef HANDLE_NARROW_SHIFT_ARITH
12127
12128 /* RTX costs entry point.  */
12129
12130 static bool
12131 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12132                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12133 {
12134   bool result;
12135   int code = GET_CODE (x);
12136   gcc_assert (current_tune->insn_extra_cost);
12137
12138   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
12139                                 (enum rtx_code) outer_code,
12140                                 current_tune->insn_extra_cost,
12141                                 total, speed);
12142
12143   if (dump_file && arm_verbose_cost)
12144     {
12145       print_rtl_single (dump_file, x);
12146       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12147                *total, result ? "final" : "partial");
12148     }
12149   return result;
12150 }
12151
12152 static int
12153 arm_insn_cost (rtx_insn *insn, bool speed)
12154 {
12155   int cost;
12156
12157   /* Don't cost a simple reg-reg move at a full insn cost: such moves
12158      will likely disappear during register allocation.  */
12159   if (!reload_completed
12160       && GET_CODE (PATTERN (insn)) == SET
12161       && REG_P (SET_DEST (PATTERN (insn)))
12162       && REG_P (SET_SRC (PATTERN (insn))))
12163     return 2;
12164   cost = pattern_cost (PATTERN (insn), speed);
12165   /* If the cost is zero, then it's likely a complex insn.  We don't want the
12166      cost of these to be less than something we know about.  */
12167   return cost ? cost : COSTS_N_INSNS (2);
12168 }
12169
12170 /* All address computations that can be done are free, but rtx cost returns
12171    the same for practically all of them.  So we weight the different types
12172    of address here in the order (most pref first):
12173    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
12174 static inline int
12175 arm_arm_address_cost (rtx x)
12176 {
12177   enum rtx_code c  = GET_CODE (x);
12178
12179   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12180     return 0;
12181   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12182     return 10;
12183
12184   if (c == PLUS)
12185     {
12186       if (CONST_INT_P (XEXP (x, 1)))
12187         return 2;
12188
12189       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12190         return 3;
12191
12192       return 4;
12193     }
12194
12195   return 6;
12196 }
12197
12198 static inline int
12199 arm_thumb_address_cost (rtx x)
12200 {
12201   enum rtx_code c  = GET_CODE (x);
12202
12203   if (c == REG)
12204     return 1;
12205   if (c == PLUS
12206       && REG_P (XEXP (x, 0))
12207       && CONST_INT_P (XEXP (x, 1)))
12208     return 1;
12209
12210   return 2;
12211 }
12212
12213 static int
12214 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12215                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12216 {
12217   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12218 }
12219
12220 /* Adjust cost hook for XScale.  */
12221 static bool
12222 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12223                           int * cost)
12224 {
12225   /* Some true dependencies can have a higher cost depending
12226      on precisely how certain input operands are used.  */
12227   if (dep_type == 0
12228       && recog_memoized (insn) >= 0
12229       && recog_memoized (dep) >= 0)
12230     {
12231       int shift_opnum = get_attr_shift (insn);
12232       enum attr_type attr_type = get_attr_type (dep);
12233
12234       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12235          operand for INSN.  If we have a shifted input operand and the
12236          instruction we depend on is another ALU instruction, then we may
12237          have to account for an additional stall.  */
12238       if (shift_opnum != 0
12239           && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12240               || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12241               || attr_type == TYPE_ALUS_SHIFT_IMM
12242               || attr_type == TYPE_LOGIC_SHIFT_IMM
12243               || attr_type == TYPE_LOGICS_SHIFT_IMM
12244               || attr_type == TYPE_ALU_SHIFT_REG
12245               || attr_type == TYPE_ALUS_SHIFT_REG
12246               || attr_type == TYPE_LOGIC_SHIFT_REG
12247               || attr_type == TYPE_LOGICS_SHIFT_REG
12248               || attr_type == TYPE_MOV_SHIFT
12249               || attr_type == TYPE_MVN_SHIFT
12250               || attr_type == TYPE_MOV_SHIFT_REG
12251               || attr_type == TYPE_MVN_SHIFT_REG))
12252         {
12253           rtx shifted_operand;
12254           int opno;
12255
12256           /* Get the shifted operand.  */
12257           extract_insn (insn);
12258           shifted_operand = recog_data.operand[shift_opnum];
12259
12260           /* Iterate over all the operands in DEP.  If we write an operand
12261              that overlaps with SHIFTED_OPERAND, then we have increase the
12262              cost of this dependency.  */
12263           extract_insn (dep);
12264           preprocess_constraints (dep);
12265           for (opno = 0; opno < recog_data.n_operands; opno++)
12266             {
12267               /* We can ignore strict inputs.  */
12268               if (recog_data.operand_type[opno] == OP_IN)
12269                 continue;
12270
12271               if (reg_overlap_mentioned_p (recog_data.operand[opno],
12272                                            shifted_operand))
12273                 {
12274                   *cost = 2;
12275                   return false;
12276                 }
12277             }
12278         }
12279     }
12280   return true;
12281 }
12282
12283 /* Adjust cost hook for Cortex A9.  */
12284 static bool
12285 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12286                              int * cost)
12287 {
12288   switch (dep_type)
12289     {
12290     case REG_DEP_ANTI:
12291       *cost = 0;
12292       return false;
12293
12294     case REG_DEP_TRUE:
12295     case REG_DEP_OUTPUT:
12296         if (recog_memoized (insn) >= 0
12297             && recog_memoized (dep) >= 0)
12298           {
12299             if (GET_CODE (PATTERN (insn)) == SET)
12300               {
12301                 if (GET_MODE_CLASS
12302                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12303                   || GET_MODE_CLASS
12304                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12305                   {
12306                     enum attr_type attr_type_insn = get_attr_type (insn);
12307                     enum attr_type attr_type_dep = get_attr_type (dep);
12308
12309                     /* By default all dependencies of the form
12310                        s0 = s0 <op> s1
12311                        s0 = s0 <op> s2
12312                        have an extra latency of 1 cycle because
12313                        of the input and output dependency in this
12314                        case. However this gets modeled as an true
12315                        dependency and hence all these checks.  */
12316                     if (REG_P (SET_DEST (PATTERN (insn)))
12317                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12318                       {
12319                         /* FMACS is a special case where the dependent
12320                            instruction can be issued 3 cycles before
12321                            the normal latency in case of an output
12322                            dependency.  */
12323                         if ((attr_type_insn == TYPE_FMACS
12324                              || attr_type_insn == TYPE_FMACD)
12325                             && (attr_type_dep == TYPE_FMACS
12326                                 || attr_type_dep == TYPE_FMACD))
12327                           {
12328                             if (dep_type == REG_DEP_OUTPUT)
12329                               *cost = insn_default_latency (dep) - 3;
12330                             else
12331                               *cost = insn_default_latency (dep);
12332                             return false;
12333                           }
12334                         else
12335                           {
12336                             if (dep_type == REG_DEP_OUTPUT)
12337                               *cost = insn_default_latency (dep) + 1;
12338                             else
12339                               *cost = insn_default_latency (dep);
12340                           }
12341                         return false;
12342                       }
12343                   }
12344               }
12345           }
12346         break;
12347
12348     default:
12349       gcc_unreachable ();
12350     }
12351
12352   return true;
12353 }
12354
12355 /* Adjust cost hook for FA726TE.  */
12356 static bool
12357 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12358                            int * cost)
12359 {
12360   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12361      have penalty of 3.  */
12362   if (dep_type == REG_DEP_TRUE
12363       && recog_memoized (insn) >= 0
12364       && recog_memoized (dep) >= 0
12365       && get_attr_conds (dep) == CONDS_SET)
12366     {
12367       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12368       if (get_attr_conds (insn) == CONDS_USE
12369           && get_attr_type (insn) != TYPE_BRANCH)
12370         {
12371           *cost = 3;
12372           return false;
12373         }
12374
12375       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12376           || get_attr_conds (insn) == CONDS_USE)
12377         {
12378           *cost = 0;
12379           return false;
12380         }
12381     }
12382
12383   return true;
12384 }
12385
12386 /* Implement TARGET_REGISTER_MOVE_COST.
12387
12388    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12389    it is typically more expensive than a single memory access.  We set
12390    the cost to less than two memory accesses so that floating
12391    point to integer conversion does not go through memory.  */
12392
12393 int
12394 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12395                         reg_class_t from, reg_class_t to)
12396 {
12397   if (TARGET_32BIT)
12398     {
12399       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12400           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12401         return 15;
12402       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12403                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12404         return 4;
12405       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12406         return 20;
12407       else
12408         return 2;
12409     }
12410   else
12411     {
12412       if (from == HI_REGS || to == HI_REGS)
12413         return 4;
12414       else
12415         return 2;
12416     }
12417 }
12418
12419 /* Implement TARGET_MEMORY_MOVE_COST.  */
12420
12421 int
12422 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12423                       bool in ATTRIBUTE_UNUSED)
12424 {
12425   if (TARGET_32BIT)
12426     return 10;
12427   else
12428     {
12429       if (GET_MODE_SIZE (mode) < 4)
12430         return 8;
12431       else
12432         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12433     }
12434 }
12435
12436 /* Vectorizer cost model implementation.  */
12437
12438 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12439 static int
12440 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12441                                 tree vectype,
12442                                 int misalign ATTRIBUTE_UNUSED)
12443 {
12444   unsigned elements;
12445
12446   switch (type_of_cost)
12447     {
12448       case scalar_stmt:
12449         return current_tune->vec_costs->scalar_stmt_cost;
12450
12451       case scalar_load:
12452         return current_tune->vec_costs->scalar_load_cost;
12453
12454       case scalar_store:
12455         return current_tune->vec_costs->scalar_store_cost;
12456
12457       case vector_stmt:
12458         return current_tune->vec_costs->vec_stmt_cost;
12459
12460       case vector_load:
12461         return current_tune->vec_costs->vec_align_load_cost;
12462
12463       case vector_store:
12464         return current_tune->vec_costs->vec_store_cost;
12465
12466       case vec_to_scalar:
12467         return current_tune->vec_costs->vec_to_scalar_cost;
12468
12469       case scalar_to_vec:
12470         return current_tune->vec_costs->scalar_to_vec_cost;
12471
12472       case unaligned_load:
12473       case vector_gather_load:
12474         return current_tune->vec_costs->vec_unalign_load_cost;
12475
12476       case unaligned_store:
12477       case vector_scatter_store:
12478         return current_tune->vec_costs->vec_unalign_store_cost;
12479
12480       case cond_branch_taken:
12481         return current_tune->vec_costs->cond_taken_branch_cost;
12482
12483       case cond_branch_not_taken:
12484         return current_tune->vec_costs->cond_not_taken_branch_cost;
12485
12486       case vec_perm:
12487       case vec_promote_demote:
12488         return current_tune->vec_costs->vec_stmt_cost;
12489
12490       case vec_construct:
12491         elements = TYPE_VECTOR_SUBPARTS (vectype);
12492         return elements / 2 + 1;
12493
12494       default:
12495         gcc_unreachable ();
12496     }
12497 }
12498
12499 /* Return true if and only if this insn can dual-issue only as older.  */
12500 static bool
12501 cortexa7_older_only (rtx_insn *insn)
12502 {
12503   if (recog_memoized (insn) < 0)
12504     return false;
12505
12506   switch (get_attr_type (insn))
12507     {
12508     case TYPE_ALU_DSP_REG:
12509     case TYPE_ALU_SREG:
12510     case TYPE_ALUS_SREG:
12511     case TYPE_LOGIC_REG:
12512     case TYPE_LOGICS_REG:
12513     case TYPE_ADC_REG:
12514     case TYPE_ADCS_REG:
12515     case TYPE_ADR:
12516     case TYPE_BFM:
12517     case TYPE_REV:
12518     case TYPE_MVN_REG:
12519     case TYPE_SHIFT_IMM:
12520     case TYPE_SHIFT_REG:
12521     case TYPE_LOAD_BYTE:
12522     case TYPE_LOAD_4:
12523     case TYPE_STORE_4:
12524     case TYPE_FFARITHS:
12525     case TYPE_FADDS:
12526     case TYPE_FFARITHD:
12527     case TYPE_FADDD:
12528     case TYPE_FMOV:
12529     case TYPE_F_CVT:
12530     case TYPE_FCMPS:
12531     case TYPE_FCMPD:
12532     case TYPE_FCONSTS:
12533     case TYPE_FCONSTD:
12534     case TYPE_FMULS:
12535     case TYPE_FMACS:
12536     case TYPE_FMULD:
12537     case TYPE_FMACD:
12538     case TYPE_FDIVS:
12539     case TYPE_FDIVD:
12540     case TYPE_F_MRC:
12541     case TYPE_F_MRRC:
12542     case TYPE_F_FLAG:
12543     case TYPE_F_LOADS:
12544     case TYPE_F_STORES:
12545       return true;
12546     default:
12547       return false;
12548     }
12549 }
12550
12551 /* Return true if and only if this insn can dual-issue as younger.  */
12552 static bool
12553 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12554 {
12555   if (recog_memoized (insn) < 0)
12556     {
12557       if (verbose > 5)
12558         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12559       return false;
12560     }
12561
12562   switch (get_attr_type (insn))
12563     {
12564     case TYPE_ALU_IMM:
12565     case TYPE_ALUS_IMM:
12566     case TYPE_LOGIC_IMM:
12567     case TYPE_LOGICS_IMM:
12568     case TYPE_EXTEND:
12569     case TYPE_MVN_IMM:
12570     case TYPE_MOV_IMM:
12571     case TYPE_MOV_REG:
12572     case TYPE_MOV_SHIFT:
12573     case TYPE_MOV_SHIFT_REG:
12574     case TYPE_BRANCH:
12575     case TYPE_CALL:
12576       return true;
12577     default:
12578       return false;
12579     }
12580 }
12581
12582
12583 /* Look for an instruction that can dual issue only as an older
12584    instruction, and move it in front of any instructions that can
12585    dual-issue as younger, while preserving the relative order of all
12586    other instructions in the ready list.  This is a hueuristic to help
12587    dual-issue in later cycles, by postponing issue of more flexible
12588    instructions.  This heuristic may affect dual issue opportunities
12589    in the current cycle.  */
12590 static void
12591 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12592                         int *n_readyp, int clock)
12593 {
12594   int i;
12595   int first_older_only = -1, first_younger = -1;
12596
12597   if (verbose > 5)
12598     fprintf (file,
12599              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12600              clock,
12601              *n_readyp);
12602
12603   /* Traverse the ready list from the head (the instruction to issue
12604      first), and looking for the first instruction that can issue as
12605      younger and the first instruction that can dual-issue only as
12606      older.  */
12607   for (i = *n_readyp - 1; i >= 0; i--)
12608     {
12609       rtx_insn *insn = ready[i];
12610       if (cortexa7_older_only (insn))
12611         {
12612           first_older_only = i;
12613           if (verbose > 5)
12614             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12615           break;
12616         }
12617       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12618         first_younger = i;
12619     }
12620
12621   /* Nothing to reorder because either no younger insn found or insn
12622      that can dual-issue only as older appears before any insn that
12623      can dual-issue as younger.  */
12624   if (first_younger == -1)
12625     {
12626       if (verbose > 5)
12627         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12628       return;
12629     }
12630
12631   /* Nothing to reorder because no older-only insn in the ready list.  */
12632   if (first_older_only == -1)
12633     {
12634       if (verbose > 5)
12635         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12636       return;
12637     }
12638
12639   /* Move first_older_only insn before first_younger.  */
12640   if (verbose > 5)
12641     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12642              INSN_UID(ready [first_older_only]),
12643              INSN_UID(ready [first_younger]));
12644   rtx_insn *first_older_only_insn = ready [first_older_only];
12645   for (i = first_older_only; i < first_younger; i++)
12646     {
12647       ready[i] = ready[i+1];
12648     }
12649
12650   ready[i] = first_older_only_insn;
12651   return;
12652 }
12653
12654 /* Implement TARGET_SCHED_REORDER. */
12655 static int
12656 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12657                    int clock)
12658 {
12659   switch (arm_tune)
12660     {
12661     case TARGET_CPU_cortexa7:
12662       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12663       break;
12664     default:
12665       /* Do nothing for other cores.  */
12666       break;
12667     }
12668
12669   return arm_issue_rate ();
12670 }
12671
12672 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12673    It corrects the value of COST based on the relationship between
12674    INSN and DEP through the dependence LINK.  It returns the new
12675    value. There is a per-core adjust_cost hook to adjust scheduler costs
12676    and the per-core hook can choose to completely override the generic
12677    adjust_cost function. Only put bits of code into arm_adjust_cost that
12678    are common across all cores.  */
12679 static int
12680 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12681                  unsigned int)
12682 {
12683   rtx i_pat, d_pat;
12684
12685  /* When generating Thumb-1 code, we want to place flag-setting operations
12686     close to a conditional branch which depends on them, so that we can
12687     omit the comparison. */
12688   if (TARGET_THUMB1
12689       && dep_type == 0
12690       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12691       && recog_memoized (dep) >= 0
12692       && get_attr_conds (dep) == CONDS_SET)
12693     return 0;
12694
12695   if (current_tune->sched_adjust_cost != NULL)
12696     {
12697       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12698         return cost;
12699     }
12700
12701   /* XXX Is this strictly true?  */
12702   if (dep_type == REG_DEP_ANTI
12703       || dep_type == REG_DEP_OUTPUT)
12704     return 0;
12705
12706   /* Call insns don't incur a stall, even if they follow a load.  */
12707   if (dep_type == 0
12708       && CALL_P (insn))
12709     return 1;
12710
12711   if ((i_pat = single_set (insn)) != NULL
12712       && MEM_P (SET_SRC (i_pat))
12713       && (d_pat = single_set (dep)) != NULL
12714       && MEM_P (SET_DEST (d_pat)))
12715     {
12716       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12717       /* This is a load after a store, there is no conflict if the load reads
12718          from a cached area.  Assume that loads from the stack, and from the
12719          constant pool are cached, and that others will miss.  This is a
12720          hack.  */
12721
12722       if ((SYMBOL_REF_P (src_mem)
12723            && CONSTANT_POOL_ADDRESS_P (src_mem))
12724           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12725           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12726           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12727         return 1;
12728     }
12729
12730   return cost;
12731 }
12732
12733 int
12734 arm_max_conditional_execute (void)
12735 {
12736   return max_insns_skipped;
12737 }
12738
12739 static int
12740 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12741 {
12742   if (TARGET_32BIT)
12743     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12744   else
12745     return (optimize > 0) ? 2 : 0;
12746 }
12747
12748 static int
12749 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12750 {
12751   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12752 }
12753
12754 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12755    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12756    sequences of non-executed instructions in IT blocks probably take the same
12757    amount of time as executed instructions (and the IT instruction itself takes
12758    space in icache).  This function was experimentally determined to give good
12759    results on a popular embedded benchmark.  */
12760
12761 static int
12762 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12763 {
12764   return (TARGET_32BIT && speed_p) ? 1
12765          : arm_default_branch_cost (speed_p, predictable_p);
12766 }
12767
12768 static int
12769 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12770 {
12771   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12772 }
12773
12774 static bool fp_consts_inited = false;
12775
12776 static REAL_VALUE_TYPE value_fp0;
12777
12778 static void
12779 init_fp_table (void)
12780 {
12781   REAL_VALUE_TYPE r;
12782
12783   r = REAL_VALUE_ATOF ("0", DFmode);
12784   value_fp0 = r;
12785   fp_consts_inited = true;
12786 }
12787
12788 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12789 int
12790 arm_const_double_rtx (rtx x)
12791 {
12792   const REAL_VALUE_TYPE *r;
12793
12794   if (!fp_consts_inited)
12795     init_fp_table ();
12796
12797   r = CONST_DOUBLE_REAL_VALUE (x);
12798   if (REAL_VALUE_MINUS_ZERO (*r))
12799     return 0;
12800
12801   if (real_equal (r, &value_fp0))
12802     return 1;
12803
12804   return 0;
12805 }
12806
12807 /* VFPv3 has a fairly wide range of representable immediates, formed from
12808    "quarter-precision" floating-point values. These can be evaluated using this
12809    formula (with ^ for exponentiation):
12810
12811      -1^s * n * 2^-r
12812
12813    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12814    16 <= n <= 31 and 0 <= r <= 7.
12815
12816    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12817
12818      - A (most-significant) is the sign bit.
12819      - BCD are the exponent (encoded as r XOR 3).
12820      - EFGH are the mantissa (encoded as n - 16).
12821 */
12822
12823 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12824    fconst[sd] instruction, or -1 if X isn't suitable.  */
12825 static int
12826 vfp3_const_double_index (rtx x)
12827 {
12828   REAL_VALUE_TYPE r, m;
12829   int sign, exponent;
12830   unsigned HOST_WIDE_INT mantissa, mant_hi;
12831   unsigned HOST_WIDE_INT mask;
12832   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12833   bool fail;
12834
12835   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12836     return -1;
12837
12838   r = *CONST_DOUBLE_REAL_VALUE (x);
12839
12840   /* We can't represent these things, so detect them first.  */
12841   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12842     return -1;
12843
12844   /* Extract sign, exponent and mantissa.  */
12845   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12846   r = real_value_abs (&r);
12847   exponent = REAL_EXP (&r);
12848   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12849      highest (sign) bit, with a fixed binary point at bit point_pos.
12850      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12851      bits for the mantissa, this may fail (low bits would be lost).  */
12852   real_ldexp (&m, &r, point_pos - exponent);
12853   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12854   mantissa = w.elt (0);
12855   mant_hi = w.elt (1);
12856
12857   /* If there are bits set in the low part of the mantissa, we can't
12858      represent this value.  */
12859   if (mantissa != 0)
12860     return -1;
12861
12862   /* Now make it so that mantissa contains the most-significant bits, and move
12863      the point_pos to indicate that the least-significant bits have been
12864      discarded.  */
12865   point_pos -= HOST_BITS_PER_WIDE_INT;
12866   mantissa = mant_hi;
12867
12868   /* We can permit four significant bits of mantissa only, plus a high bit
12869      which is always 1.  */
12870   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12871   if ((mantissa & mask) != 0)
12872     return -1;
12873
12874   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12875   mantissa >>= point_pos - 5;
12876
12877   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12878      floating-point immediate zero with Neon using an integer-zero load, but
12879      that case is handled elsewhere.)  */
12880   if (mantissa == 0)
12881     return -1;
12882
12883   gcc_assert (mantissa >= 16 && mantissa <= 31);
12884
12885   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12886      normalized significands are in the range [1, 2). (Our mantissa is shifted
12887      left 4 places at this point relative to normalized IEEE754 values).  GCC
12888      internally uses [0.5, 1) (see real.cc), so the exponent returned from
12889      REAL_EXP must be altered.  */
12890   exponent = 5 - exponent;
12891
12892   if (exponent < 0 || exponent > 7)
12893     return -1;
12894
12895   /* Sign, mantissa and exponent are now in the correct form to plug into the
12896      formula described in the comment above.  */
12897   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12898 }
12899
12900 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12901 int
12902 vfp3_const_double_rtx (rtx x)
12903 {
12904   if (!TARGET_VFP3)
12905     return 0;
12906
12907   return vfp3_const_double_index (x) != -1;
12908 }
12909
12910 /* Recognize immediates which can be used in various Neon and MVE instructions.
12911    Legal immediates are described by the following table (for VMVN variants, the
12912    bitwise inverse of the constant shown is recognized. In either case, VMOV
12913    is output and the correct instruction to use for a given constant is chosen
12914    by the assembler). The constant shown is replicated across all elements of
12915    the destination vector.
12916
12917    insn elems variant constant (binary)
12918    ---- ----- ------- -----------------
12919    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12920    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12921    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12922    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12923    vmov  i16     4    00000000 abcdefgh
12924    vmov  i16     5    abcdefgh 00000000
12925    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12926    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12927    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12928    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12929    vmvn  i16    10    00000000 abcdefgh
12930    vmvn  i16    11    abcdefgh 00000000
12931    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12932    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12933    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12934    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12935    vmov   i8    16    abcdefgh
12936    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12937                       eeeeeeee ffffffff gggggggg hhhhhhhh
12938    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12939    vmov  f32    19    00000000 00000000 00000000 00000000
12940
12941    For case 18, B = !b. Representable values are exactly those accepted by
12942    vfp3_const_double_index, but are output as floating-point numbers rather
12943    than indices.
12944
12945    For case 19, we will change it to vmov.i32 when assembling.
12946
12947    Variants 0-5 (inclusive) may also be used as immediates for the second
12948    operand of VORR/VBIC instructions.
12949
12950    The INVERSE argument causes the bitwise inverse of the given operand to be
12951    recognized instead (used for recognizing legal immediates for the VAND/VORN
12952    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12953    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12954    output, rather than the real insns vbic/vorr).
12955
12956    INVERSE makes no difference to the recognition of float vectors.
12957
12958    The return value is the variant of immediate as shown in the above table, or
12959    -1 if the given value doesn't match any of the listed patterns.
12960 */
12961 static int
12962 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12963                       rtx *modconst, int *elementwidth)
12964 {
12965 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12966   matches = 1;                                  \
12967   for (i = 0; i < idx; i += (STRIDE))           \
12968     if (!(TEST))                                \
12969       matches = 0;                              \
12970   if (matches)                                  \
12971     {                                           \
12972       immtype = (CLASS);                        \
12973       elsize = (ELSIZE);                        \
12974       break;                                    \
12975     }
12976
12977   unsigned int i, elsize = 0, idx = 0, n_elts;
12978   unsigned int innersize;
12979   unsigned char bytes[16] = {};
12980   int immtype = -1, matches;
12981   unsigned int invmask = inverse ? 0xff : 0;
12982   bool vector = GET_CODE (op) == CONST_VECTOR;
12983
12984   if (vector)
12985     n_elts = CONST_VECTOR_NUNITS (op);
12986   else
12987     {
12988       n_elts = 1;
12989       gcc_assert (mode != VOIDmode);
12990     }
12991
12992   innersize = GET_MODE_UNIT_SIZE (mode);
12993
12994   /* Only support 128-bit vectors for MVE.  */
12995   if (TARGET_HAVE_MVE
12996       && (!vector
12997           || VALID_MVE_PRED_MODE (mode)
12998           || n_elts * innersize != 16))
12999     return -1;
13000
13001   if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
13002     return -1;
13003
13004   /* Vectors of float constants.  */
13005   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13006     {
13007       rtx el0 = CONST_VECTOR_ELT (op, 0);
13008
13009       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
13010         return -1;
13011
13012       /* FP16 vectors cannot be represented.  */
13013       if (GET_MODE_INNER (mode) == HFmode)
13014         return -1;
13015
13016       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
13017          are distinct in this context.  */
13018       if (!const_vec_duplicate_p (op))
13019         return -1;
13020
13021       if (modconst)
13022         *modconst = CONST_VECTOR_ELT (op, 0);
13023
13024       if (elementwidth)
13025         *elementwidth = 0;
13026
13027       if (el0 == CONST0_RTX (GET_MODE (el0)))
13028         return 19;
13029       else
13030         return 18;
13031     }
13032
13033   /* The tricks done in the code below apply for little-endian vector layout.
13034      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13035      FIXME: Implement logic for big-endian vectors.  */
13036   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13037     return -1;
13038
13039   /* Splat vector constant out into a byte vector.  */
13040   for (i = 0; i < n_elts; i++)
13041     {
13042       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13043       unsigned HOST_WIDE_INT elpart;
13044
13045       gcc_assert (CONST_INT_P (el));
13046       elpart = INTVAL (el);
13047
13048       for (unsigned int byte = 0; byte < innersize; byte++)
13049         {
13050           bytes[idx++] = (elpart & 0xff) ^ invmask;
13051           elpart >>= BITS_PER_UNIT;
13052         }
13053     }
13054
13055   /* Sanity check.  */
13056   gcc_assert (idx == GET_MODE_SIZE (mode));
13057
13058   do
13059     {
13060       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13061                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13062
13063       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13064                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13065
13066       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13067                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13068
13069       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13070                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13071
13072       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13073
13074       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13075
13076       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13077                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13078
13079       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13080                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13081
13082       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13083                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13084
13085       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13086                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13087
13088       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13089
13090       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13091
13092       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13093                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13094
13095       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13096                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13097
13098       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13099                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13100
13101       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13102                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13103
13104       CHECK (1, 8, 16, bytes[i] == bytes[0]);
13105
13106       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13107                         && bytes[i] == bytes[(i + 8) % idx]);
13108     }
13109   while (0);
13110
13111   if (immtype == -1)
13112     return -1;
13113
13114   if (elementwidth)
13115     *elementwidth = elsize;
13116
13117   if (modconst)
13118     {
13119       unsigned HOST_WIDE_INT imm = 0;
13120
13121       /* Un-invert bytes of recognized vector, if necessary.  */
13122       if (invmask != 0)
13123         for (i = 0; i < idx; i++)
13124           bytes[i] ^= invmask;
13125
13126       if (immtype == 17)
13127         {
13128           /* FIXME: Broken on 32-bit H_W_I hosts.  */
13129           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13130
13131           for (i = 0; i < 8; i++)
13132             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13133                    << (i * BITS_PER_UNIT);
13134
13135           *modconst = GEN_INT (imm);
13136         }
13137       else
13138         {
13139           unsigned HOST_WIDE_INT imm = 0;
13140
13141           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13142             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13143
13144           *modconst = GEN_INT (imm);
13145         }
13146     }
13147
13148   return immtype;
13149 #undef CHECK
13150 }
13151
13152 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13153    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
13154    (or zero for float elements), and a modified constant (whatever should be
13155    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
13156    modified to "simd_immediate_valid_for_move" as this function will be used
13157    both by neon and mve.  */
13158 int
13159 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13160                                rtx *modconst, int *elementwidth)
13161 {
13162   rtx tmpconst;
13163   int tmpwidth;
13164   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13165
13166   if (retval == -1)
13167     return 0;
13168
13169   if (modconst)
13170     *modconst = tmpconst;
13171
13172   if (elementwidth)
13173     *elementwidth = tmpwidth;
13174
13175   return 1;
13176 }
13177
13178 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
13179    the immediate is valid, write a constant suitable for using as an operand
13180    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13181    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
13182
13183 int
13184 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13185                                 rtx *modconst, int *elementwidth)
13186 {
13187   rtx tmpconst;
13188   int tmpwidth;
13189   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13190
13191   if (retval < 0 || retval > 5)
13192     return 0;
13193
13194   if (modconst)
13195     *modconst = tmpconst;
13196
13197   if (elementwidth)
13198     *elementwidth = tmpwidth;
13199
13200   return 1;
13201 }
13202
13203 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
13204    the immediate is valid, write a constant suitable for using as an operand
13205    to VSHR/VSHL to *MODCONST and the corresponding element width to
13206    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13207    because they have different limitations.  */
13208
13209 int
13210 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13211                                 rtx *modconst, int *elementwidth,
13212                                 bool isleftshift)
13213 {
13214   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13215   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13216   unsigned HOST_WIDE_INT last_elt = 0;
13217   unsigned HOST_WIDE_INT maxshift;
13218
13219   /* Split vector constant out into a byte vector.  */
13220   for (i = 0; i < n_elts; i++)
13221     {
13222       rtx el = CONST_VECTOR_ELT (op, i);
13223       unsigned HOST_WIDE_INT elpart;
13224
13225       if (CONST_INT_P (el))
13226         elpart = INTVAL (el);
13227       else if (CONST_DOUBLE_P (el))
13228         return 0;
13229       else
13230         gcc_unreachable ();
13231
13232       if (i != 0 && elpart != last_elt)
13233         return 0;
13234
13235       last_elt = elpart;
13236     }
13237
13238   /* Shift less than element size.  */
13239   maxshift = innersize * 8;
13240
13241   if (isleftshift)
13242     {
13243       /* Left shift immediate value can be from 0 to <size>-1.  */
13244       if (last_elt >= maxshift)
13245         return 0;
13246     }
13247   else
13248     {
13249       /* Right shift immediate value can be from 1 to <size>.  */
13250       if (last_elt == 0 || last_elt > maxshift)
13251         return 0;
13252     }
13253
13254   if (elementwidth)
13255     *elementwidth = innersize * 8;
13256
13257   if (modconst)
13258     *modconst = CONST_VECTOR_ELT (op, 0);
13259
13260   return 1;
13261 }
13262
13263 /* Return a string suitable for output of Neon immediate logic operation
13264    MNEM.  */
13265
13266 char *
13267 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13268                              int inverse, int quad)
13269 {
13270   int width, is_valid;
13271   static char templ[40];
13272
13273   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13274
13275   gcc_assert (is_valid != 0);
13276
13277   if (quad)
13278     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13279   else
13280     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13281
13282   return templ;
13283 }
13284
13285 /* Return a string suitable for output of Neon immediate shift operation
13286    (VSHR or VSHL) MNEM.  */
13287
13288 char *
13289 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13290                              machine_mode mode, int quad,
13291                              bool isleftshift)
13292 {
13293   int width, is_valid;
13294   static char templ[40];
13295
13296   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13297   gcc_assert (is_valid != 0);
13298
13299   if (quad)
13300     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13301   else
13302     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13303
13304   return templ;
13305 }
13306
13307 /* Output a sequence of pairwise operations to implement a reduction.
13308    NOTE: We do "too much work" here, because pairwise operations work on two
13309    registers-worth of operands in one go. Unfortunately we can't exploit those
13310    extra calculations to do the full operation in fewer steps, I don't think.
13311    Although all vector elements of the result but the first are ignored, we
13312    actually calculate the same result in each of the elements. An alternative
13313    such as initially loading a vector with zero to use as each of the second
13314    operands would use up an additional register and take an extra instruction,
13315    for no particular gain.  */
13316
13317 void
13318 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13319                       rtx (*reduc) (rtx, rtx, rtx))
13320 {
13321   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13322   rtx tmpsum = op1;
13323
13324   for (i = parts / 2; i >= 1; i /= 2)
13325     {
13326       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13327       emit_insn (reduc (dest, tmpsum, tmpsum));
13328       tmpsum = dest;
13329     }
13330 }
13331
13332 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13333    loaded into a register using VDUP.
13334
13335    If this is the case, and GENERATE is set, we also generate
13336    instructions to do this and return an RTX to assign to the register.  */
13337
13338 static rtx
13339 neon_vdup_constant (rtx vals, bool generate)
13340 {
13341   machine_mode mode = GET_MODE (vals);
13342   machine_mode inner_mode = GET_MODE_INNER (mode);
13343   rtx x;
13344
13345   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13346     return NULL_RTX;
13347
13348   if (!const_vec_duplicate_p (vals, &x))
13349     /* The elements are not all the same.  We could handle repeating
13350        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13351        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13352        vdup.i16).  */
13353     return NULL_RTX;
13354
13355   if (!generate)
13356     return x;
13357
13358   /* We can load this constant by using VDUP and a constant in a
13359      single ARM register.  This will be cheaper than a vector
13360      load.  */
13361
13362   x = copy_to_mode_reg (inner_mode, x);
13363   return gen_vec_duplicate (mode, x);
13364 }
13365
13366 /* Return a HI representation of CONST_VEC suitable for MVE predicates.  */
13367 rtx
13368 mve_bool_vec_to_const (rtx const_vec)
13369 {
13370   machine_mode mode = GET_MODE (const_vec);
13371
13372   if (!VECTOR_MODE_P (mode))
13373     return const_vec;
13374
13375   unsigned n_elts = GET_MODE_NUNITS (mode);
13376   unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13377   unsigned shift_c = 16 / n_elts;
13378   unsigned i;
13379   int hi_val = 0;
13380
13381   for (i = 0; i < n_elts; i++)
13382     {
13383       rtx el = CONST_VECTOR_ELT (const_vec, i);
13384       unsigned HOST_WIDE_INT elpart;
13385
13386       gcc_assert (CONST_INT_P (el));
13387       elpart = INTVAL (el) & ((1U << el_prec) - 1);
13388
13389       unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13390
13391       hi_val |= elpart << (index * shift_c);
13392     }
13393   /* We are using mov immediate to encode this constant which writes 32-bits
13394      so we need to make sure the top 16-bits are all 0, otherwise we can't
13395      guarantee we can actually write this immediate.  */
13396   return gen_int_mode (hi_val, SImode);
13397 }
13398
13399 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13400    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13401    into a register.
13402
13403    If this is the case, and GENERATE is set, we also generate code to do
13404    this and return an RTX to copy into the register.  */
13405
13406 rtx
13407 neon_make_constant (rtx vals, bool generate)
13408 {
13409   machine_mode mode = GET_MODE (vals);
13410   rtx target;
13411   rtx const_vec = NULL_RTX;
13412   int n_elts = GET_MODE_NUNITS (mode);
13413   int n_const = 0;
13414   int i;
13415
13416   if (GET_CODE (vals) == CONST_VECTOR)
13417     const_vec = vals;
13418   else if (GET_CODE (vals) == PARALLEL)
13419     {
13420       /* A CONST_VECTOR must contain only CONST_INTs and
13421          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13422          Only store valid constants in a CONST_VECTOR.  */
13423       for (i = 0; i < n_elts; ++i)
13424         {
13425           rtx x = XVECEXP (vals, 0, i);
13426           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13427             n_const++;
13428         }
13429       if (n_const == n_elts)
13430         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13431     }
13432   else
13433     gcc_unreachable ();
13434
13435   if (const_vec != NULL
13436       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13437     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13438     return const_vec;
13439   else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13440     return mve_bool_vec_to_const (const_vec);
13441   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13442     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13443        pipeline cycle; creating the constant takes one or two ARM
13444        pipeline cycles.  */
13445     return target;
13446   else if (const_vec != NULL_RTX)
13447     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13448        (for either double or quad vectors).  We cannot take advantage
13449        of single-cycle VLD1 because we need a PC-relative addressing
13450        mode.  */
13451     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13452   else
13453     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13454        We cannot construct an initializer.  */
13455     return NULL_RTX;
13456 }
13457
13458 /* Initialize vector TARGET to VALS.  */
13459
13460 void
13461 neon_expand_vector_init (rtx target, rtx vals)
13462 {
13463   machine_mode mode = GET_MODE (target);
13464   machine_mode inner_mode = GET_MODE_INNER (mode);
13465   int n_elts = GET_MODE_NUNITS (mode);
13466   int n_var = 0, one_var = -1;
13467   bool all_same = true;
13468   rtx x, mem;
13469   int i;
13470
13471   for (i = 0; i < n_elts; ++i)
13472     {
13473       x = XVECEXP (vals, 0, i);
13474       if (!CONSTANT_P (x))
13475         ++n_var, one_var = i;
13476
13477       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13478         all_same = false;
13479     }
13480
13481   if (n_var == 0)
13482     {
13483       rtx constant = neon_make_constant (vals);
13484       if (constant != NULL_RTX)
13485         {
13486           emit_move_insn (target, constant);
13487           return;
13488         }
13489     }
13490
13491   /* Splat a single non-constant element if we can.  */
13492   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13493     {
13494       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13495       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13496       return;
13497     }
13498
13499   /* One field is non-constant.  Load constant then overwrite varying
13500      field.  This is more efficient than using the stack.  */
13501   if (n_var == 1)
13502     {
13503       rtx copy = copy_rtx (vals);
13504       rtx merge_mask = GEN_INT (1 << one_var);
13505
13506       /* Load constant part of vector, substitute neighboring value for
13507          varying element.  */
13508       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13509       neon_expand_vector_init (target, copy);
13510
13511       /* Insert variable.  */
13512       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13513       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13514       return;
13515     }
13516
13517   /* Construct the vector in memory one field at a time
13518      and load the whole vector.  */
13519   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13520   for (i = 0; i < n_elts; i++)
13521     emit_move_insn (adjust_address_nv (mem, inner_mode,
13522                                     i * GET_MODE_SIZE (inner_mode)),
13523                     XVECEXP (vals, 0, i));
13524   emit_move_insn (target, mem);
13525 }
13526
13527 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13528    ERR if it doesn't.  EXP indicates the source location, which includes the
13529    inlining history for intrinsics.  */
13530
13531 static void
13532 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13533               const_tree exp, const char *desc)
13534 {
13535   HOST_WIDE_INT lane;
13536
13537   gcc_assert (CONST_INT_P (operand));
13538
13539   lane = INTVAL (operand);
13540
13541   if (lane < low || lane >= high)
13542     {
13543       if (exp)
13544         error_at (EXPR_LOCATION (exp),
13545                   "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13546       else
13547         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13548     }
13549 }
13550
13551 /* Bounds-check lanes.  */
13552
13553 void
13554 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13555                   const_tree exp)
13556 {
13557   bounds_check (operand, low, high, exp, "lane");
13558 }
13559
13560 /* Bounds-check constants.  */
13561
13562 void
13563 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13564 {
13565   bounds_check (operand, low, high, NULL_TREE, "constant");
13566 }
13567
13568 HOST_WIDE_INT
13569 neon_element_bits (machine_mode mode)
13570 {
13571   return GET_MODE_UNIT_BITSIZE (mode);
13572 }
13573
13574 \f
13575 /* Predicates for `match_operand' and `match_operator'.  */
13576
13577 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13578    WB level is 2 if full writeback address modes are allowed, 1
13579    if limited writeback address modes (POST_INC and PRE_DEC) are
13580    allowed and 0 if no writeback at all is supported.  */
13581
13582 int
13583 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13584 {
13585   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13586   rtx ind;
13587
13588   /* Reject eliminable registers.  */
13589   if (! (reload_in_progress || reload_completed || lra_in_progress)
13590       && (   reg_mentioned_p (frame_pointer_rtx, op)
13591           || reg_mentioned_p (arg_pointer_rtx, op)
13592           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13593           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13594           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13595           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13596     return FALSE;
13597
13598   /* Constants are converted into offsets from labels.  */
13599   if (!MEM_P (op))
13600     return FALSE;
13601
13602   ind = XEXP (op, 0);
13603
13604   if (reload_completed
13605       && (LABEL_REF_P (ind)
13606           || (GET_CODE (ind) == CONST
13607               && GET_CODE (XEXP (ind, 0)) == PLUS
13608               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13609               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13610     return TRUE;
13611
13612   /* Match: (mem (reg)).  */
13613   if (REG_P (ind))
13614     return arm_address_register_rtx_p (ind, 0);
13615
13616   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13617      acceptable in any case (subject to verification by
13618      arm_address_register_rtx_p).  We need full writeback to accept
13619      PRE_INC and POST_DEC, and at least restricted writeback for
13620      PRE_INC and POST_DEC.  */
13621   if (wb_level > 0
13622       && (GET_CODE (ind) == POST_INC
13623           || GET_CODE (ind) == PRE_DEC
13624           || (wb_level > 1
13625               && (GET_CODE (ind) == PRE_INC
13626                   || GET_CODE (ind) == POST_DEC))))
13627     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13628
13629   if (wb_level > 1
13630       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13631       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13632       && GET_CODE (XEXP (ind, 1)) == PLUS
13633       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13634     ind = XEXP (ind, 1);
13635
13636   /* Match:
13637      (plus (reg)
13638            (const))
13639
13640      The encoded immediate for 16-bit modes is multiplied by 2,
13641      while the encoded immediate for 32-bit and 64-bit modes is
13642      multiplied by 4.  */
13643   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13644   if (GET_CODE (ind) == PLUS
13645       && REG_P (XEXP (ind, 0))
13646       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13647       && CONST_INT_P (XEXP (ind, 1))
13648       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13649       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13650     return TRUE;
13651
13652   return FALSE;
13653 }
13654
13655 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13656    WB is true if full writeback address modes are allowed and is false
13657    if limited writeback address modes (POST_INC and PRE_DEC) are
13658    allowed.  */
13659
13660 int arm_coproc_mem_operand (rtx op, bool wb)
13661 {
13662   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13663 }
13664
13665 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13666    context in which no writeback address modes are allowed.  */
13667
13668 int
13669 arm_coproc_mem_operand_no_writeback (rtx op)
13670 {
13671   return arm_coproc_mem_operand_wb (op, 0);
13672 }
13673
13674 /* In non-STRICT mode, return the register number; in STRICT mode return
13675    the hard regno or the replacement if it won't be a mem.  Otherwise, return
13676    the original pseudo number.  */
13677 static int
13678 arm_effective_regno (rtx op, bool strict)
13679 {
13680   gcc_assert (REG_P (op));
13681   if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
13682       || !reg_renumber || reg_renumber[REGNO (op)] < 0)
13683     return REGNO (op);
13684   return reg_renumber[REGNO (op)];
13685 }
13686
13687 /* This function returns TRUE on matching mode and op.
13688 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13689 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13690 int
13691 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13692 {
13693   enum rtx_code code;
13694   int val, reg_no;
13695
13696   /* Match: (mem (reg)).  */
13697   if (REG_P (op))
13698     {
13699       reg_no = arm_effective_regno (op, strict);
13700       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13701                ? reg_no <= LAST_LO_REGNUM
13702                : reg_no < LAST_ARM_REGNUM)
13703               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13704     }
13705   code = GET_CODE (op);
13706
13707   if ((code == POST_INC
13708        || code == PRE_DEC
13709        || code == PRE_INC
13710        || code == POST_DEC)
13711       && REG_P (XEXP (op, 0)))
13712     {
13713       reg_no = arm_effective_regno (XEXP (op, 0), strict);
13714       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13715                ? reg_no <= LAST_LO_REGNUM
13716                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13717               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13718     }
13719   else if (((code == POST_MODIFY || code == PRE_MODIFY)
13720             && GET_CODE (XEXP (op, 1)) == PLUS
13721             && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13722             && REG_P (XEXP (op, 0))
13723             && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13724            /* Make sure to only accept PLUS after reload_completed, otherwise
13725               this will interfere with auto_inc's pattern detection.  */
13726            || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13727                && GET_CODE (XEXP (op, 1)) == CONST_INT))
13728     {
13729       reg_no = arm_effective_regno (XEXP (op, 0), strict);
13730       if (code == PLUS)
13731         val = INTVAL (XEXP (op, 1));
13732       else
13733         val = INTVAL (XEXP(XEXP (op, 1), 1));
13734
13735       switch (mode)
13736         {
13737           case E_V16QImode:
13738           case E_V8QImode:
13739           case E_V4QImode:
13740             if (abs (val) > 127)
13741               return FALSE;
13742             break;
13743           case E_V8HImode:
13744           case E_V8HFmode:
13745           case E_V4HImode:
13746           case E_V4HFmode:
13747             if (val % 2 != 0 || abs (val) > 254)
13748               return FALSE;
13749             break;
13750           case E_V4SImode:
13751           case E_V4SFmode:
13752             if (val % 4 != 0 || abs (val) > 508)
13753               return FALSE;
13754             break;
13755           default:
13756             return FALSE;
13757         }
13758       return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13759               || (MVE_STN_LDW_MODE (mode)
13760                   ? reg_no <= LAST_LO_REGNUM
13761                   : (reg_no < LAST_ARM_REGNUM
13762                      && (code == PLUS || reg_no != SP_REGNUM))));
13763     }
13764   return FALSE;
13765 }
13766
13767 /* Return TRUE if OP is a memory operand which we can load or store a vector
13768    to/from. TYPE is one of the following values:
13769     0 - Vector load/stor (vldr)
13770     1 - Core registers (ldm)
13771     2 - Element/structure loads (vld1)
13772  */
13773 int
13774 neon_vector_mem_operand (rtx op, int type, bool strict)
13775 {
13776   rtx ind;
13777
13778   /* Reject eliminable registers.  */
13779   if (strict && ! (reload_in_progress || reload_completed)
13780       && (reg_mentioned_p (frame_pointer_rtx, op)
13781           || reg_mentioned_p (arg_pointer_rtx, op)
13782           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13783           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13784           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13785           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13786     return FALSE;
13787
13788   /* Constants are converted into offsets from labels.  */
13789   if (!MEM_P (op))
13790     return FALSE;
13791
13792   ind = XEXP (op, 0);
13793
13794   if (reload_completed
13795       && (LABEL_REF_P (ind)
13796           || (GET_CODE (ind) == CONST
13797               && GET_CODE (XEXP (ind, 0)) == PLUS
13798               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13799               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13800     return TRUE;
13801
13802   /* Match: (mem (reg)).  */
13803   if (REG_P (ind))
13804     return arm_address_register_rtx_p (ind, 0);
13805
13806   /* Allow post-increment with Neon registers.  */
13807   if ((type != 1 && GET_CODE (ind) == POST_INC)
13808       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13809     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13810
13811   /* Allow post-increment by register for VLDn */
13812   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13813       && GET_CODE (XEXP (ind, 1)) == PLUS
13814       && REG_P (XEXP (XEXP (ind, 1), 1))
13815       && REG_P (XEXP (ind, 0))
13816       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13817      return true;
13818
13819   /* Match:
13820      (plus (reg)
13821           (const)).  */
13822   if (type == 0
13823       && GET_CODE (ind) == PLUS
13824       && REG_P (XEXP (ind, 0))
13825       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13826       && CONST_INT_P (XEXP (ind, 1))
13827       && INTVAL (XEXP (ind, 1)) > -1024
13828       /* For quad modes, we restrict the constant offset to be slightly less
13829          than what the instruction format permits.  We have no such constraint
13830          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13831       && (INTVAL (XEXP (ind, 1))
13832           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13833       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13834     return TRUE;
13835
13836   return FALSE;
13837 }
13838
13839 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13840    type.  */
13841 int
13842 mve_struct_mem_operand (rtx op)
13843 {
13844   rtx ind = XEXP (op, 0);
13845
13846   /* Match: (mem (reg)).  */
13847   if (REG_P (ind))
13848     return arm_address_register_rtx_p (ind, 0);
13849
13850   /* Allow only post-increment by the mode size.  */
13851   if (GET_CODE (ind) == POST_INC)
13852     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13853
13854   return FALSE;
13855 }
13856
13857 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13858    type.  */
13859 int
13860 neon_struct_mem_operand (rtx op)
13861 {
13862   rtx ind;
13863
13864   /* Reject eliminable registers.  */
13865   if (! (reload_in_progress || reload_completed)
13866       && (   reg_mentioned_p (frame_pointer_rtx, op)
13867           || reg_mentioned_p (arg_pointer_rtx, op)
13868           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13869           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13870           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13871           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13872     return FALSE;
13873
13874   /* Constants are converted into offsets from labels.  */
13875   if (!MEM_P (op))
13876     return FALSE;
13877
13878   ind = XEXP (op, 0);
13879
13880   if (reload_completed
13881       && (LABEL_REF_P (ind)
13882           || (GET_CODE (ind) == CONST
13883               && GET_CODE (XEXP (ind, 0)) == PLUS
13884               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13885               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13886     return TRUE;
13887
13888   /* Match: (mem (reg)).  */
13889   if (REG_P (ind))
13890     return arm_address_register_rtx_p (ind, 0);
13891
13892   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13893   if (GET_CODE (ind) == POST_INC
13894       || GET_CODE (ind) == PRE_DEC)
13895     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13896
13897   return FALSE;
13898 }
13899
13900 /* Prepares the operands for the VCMLA by lane instruction such that the right
13901    register number is selected.  This instruction is special in that it always
13902    requires a D register, however there is a choice to be made between Dn[0],
13903    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13904
13905    The VCMLA by lane function always selects two values. For instance given D0
13906    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13907    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13908    D0[0] or D1[0] are both valid.
13909
13910    This function centralizes that information based on OPERANDS, OPERANDS[3]
13911    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13912    updated to contain the right index.  */
13913
13914 rtx *
13915 neon_vcmla_lane_prepare_operands (rtx *operands)
13916 {
13917   int lane = INTVAL (operands[4]);
13918   machine_mode constmode = SImode;
13919   machine_mode mode = GET_MODE (operands[3]);
13920   int regno = REGNO (operands[3]);
13921   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13922   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13923     {
13924       operands[3] = gen_int_mode (regno + 1, constmode);
13925       operands[4]
13926         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13927     }
13928   else
13929     {
13930       operands[3] = gen_int_mode (regno, constmode);
13931       operands[4] = gen_int_mode (lane, constmode);
13932     }
13933   return operands;
13934 }
13935
13936
13937 /* Return true if X is a register that will be eliminated later on.  */
13938 int
13939 arm_eliminable_register (rtx x)
13940 {
13941   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13942                        || REGNO (x) == ARG_POINTER_REGNUM
13943                        || VIRTUAL_REGISTER_P (x));
13944 }
13945
13946 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13947    coprocessor registers.  Otherwise return NO_REGS.  */
13948
13949 enum reg_class
13950 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13951 {
13952   if (mode == HFmode)
13953     {
13954       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13955         return GENERAL_REGS;
13956       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13957         return NO_REGS;
13958       return GENERAL_REGS;
13959     }
13960
13961   /* The neon move patterns handle all legitimate vector and struct
13962      addresses.  */
13963   if (TARGET_NEON
13964       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13965       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13966           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13967           || VALID_NEON_STRUCT_MODE (mode)))
13968     return NO_REGS;
13969
13970   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13971     return NO_REGS;
13972
13973   return GENERAL_REGS;
13974 }
13975
13976 /* Values which must be returned in the most-significant end of the return
13977    register.  */
13978
13979 static bool
13980 arm_return_in_msb (const_tree valtype)
13981 {
13982   return (TARGET_AAPCS_BASED
13983           && BYTES_BIG_ENDIAN
13984           && (AGGREGATE_TYPE_P (valtype)
13985               || TREE_CODE (valtype) == COMPLEX_TYPE
13986               || FIXED_POINT_TYPE_P (valtype)));
13987 }
13988
13989 /* Return TRUE if X references a SYMBOL_REF.  */
13990 int
13991 symbol_mentioned_p (rtx x)
13992 {
13993   const char * fmt;
13994   int i;
13995
13996   if (SYMBOL_REF_P (x))
13997     return 1;
13998
13999   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
14000      are constant offsets, not symbols.  */
14001   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14002     return 0;
14003
14004   fmt = GET_RTX_FORMAT (GET_CODE (x));
14005
14006   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14007     {
14008       if (fmt[i] == 'E')
14009         {
14010           int j;
14011
14012           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14013             if (symbol_mentioned_p (XVECEXP (x, i, j)))
14014               return 1;
14015         }
14016       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
14017         return 1;
14018     }
14019
14020   return 0;
14021 }
14022
14023 /* Return TRUE if X references a LABEL_REF.  */
14024 int
14025 label_mentioned_p (rtx x)
14026 {
14027   const char * fmt;
14028   int i;
14029
14030   if (LABEL_REF_P (x))
14031     return 1;
14032
14033   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14034      instruction, but they are constant offsets, not symbols.  */
14035   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14036     return 0;
14037
14038   fmt = GET_RTX_FORMAT (GET_CODE (x));
14039   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14040     {
14041       if (fmt[i] == 'E')
14042         {
14043           int j;
14044
14045           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14046             if (label_mentioned_p (XVECEXP (x, i, j)))
14047               return 1;
14048         }
14049       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14050         return 1;
14051     }
14052
14053   return 0;
14054 }
14055
14056 int
14057 tls_mentioned_p (rtx x)
14058 {
14059   switch (GET_CODE (x))
14060     {
14061     case CONST:
14062       return tls_mentioned_p (XEXP (x, 0));
14063
14064     case UNSPEC:
14065       if (XINT (x, 1) == UNSPEC_TLS)
14066         return 1;
14067
14068     /* Fall through.  */
14069     default:
14070       return 0;
14071     }
14072 }
14073
14074 /* Must not copy any rtx that uses a pc-relative address.
14075    Also, disallow copying of load-exclusive instructions that
14076    may appear after splitting of compare-and-swap-style operations
14077    so as to prevent those loops from being transformed away from their
14078    canonical forms (see PR 69904).  */
14079
14080 static bool
14081 arm_cannot_copy_insn_p (rtx_insn *insn)
14082 {
14083   /* The tls call insn cannot be copied, as it is paired with a data
14084      word.  */
14085   if (recog_memoized (insn) == CODE_FOR_tlscall)
14086     return true;
14087
14088   subrtx_iterator::array_type array;
14089   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14090     {
14091       const_rtx x = *iter;
14092       if (GET_CODE (x) == UNSPEC
14093           && (XINT (x, 1) == UNSPEC_PIC_BASE
14094               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14095         return true;
14096     }
14097
14098   rtx set = single_set (insn);
14099   if (set)
14100     {
14101       rtx src = SET_SRC (set);
14102       if (GET_CODE (src) == ZERO_EXTEND)
14103         src = XEXP (src, 0);
14104
14105       /* Catch the load-exclusive and load-acquire operations.  */
14106       if (GET_CODE (src) == UNSPEC_VOLATILE
14107           && (XINT (src, 1) == VUNSPEC_LL
14108               || XINT (src, 1) == VUNSPEC_LAX))
14109         return true;
14110     }
14111   return false;
14112 }
14113
14114 enum rtx_code
14115 minmax_code (rtx x)
14116 {
14117   enum rtx_code code = GET_CODE (x);
14118
14119   switch (code)
14120     {
14121     case SMAX:
14122       return GE;
14123     case SMIN:
14124       return LE;
14125     case UMIN:
14126       return LEU;
14127     case UMAX:
14128       return GEU;
14129     default:
14130       gcc_unreachable ();
14131     }
14132 }
14133
14134 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
14135
14136 bool
14137 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14138                         int *mask, bool *signed_sat)
14139 {
14140   /* The high bound must be a power of two minus one.  */
14141   int log = exact_log2 (INTVAL (hi_bound) + 1);
14142   if (log == -1)
14143     return false;
14144
14145   /* The low bound is either zero (for usat) or one less than the
14146      negation of the high bound (for ssat).  */
14147   if (INTVAL (lo_bound) == 0)
14148     {
14149       if (mask)
14150         *mask = log;
14151       if (signed_sat)
14152         *signed_sat = false;
14153
14154       return true;
14155     }
14156
14157   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14158     {
14159       if (mask)
14160         *mask = log + 1;
14161       if (signed_sat)
14162         *signed_sat = true;
14163
14164       return true;
14165     }
14166
14167   return false;
14168 }
14169
14170 /* Return 1 if memory locations are adjacent.  */
14171 int
14172 adjacent_mem_locations (rtx a, rtx b)
14173 {
14174   /* We don't guarantee to preserve the order of these memory refs.  */
14175   if (volatile_refs_p (a) || volatile_refs_p (b))
14176     return 0;
14177
14178   if ((REG_P (XEXP (a, 0))
14179        || (GET_CODE (XEXP (a, 0)) == PLUS
14180            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14181       && (REG_P (XEXP (b, 0))
14182           || (GET_CODE (XEXP (b, 0)) == PLUS
14183               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14184     {
14185       HOST_WIDE_INT val0 = 0, val1 = 0;
14186       rtx reg0, reg1;
14187       int val_diff;
14188
14189       if (GET_CODE (XEXP (a, 0)) == PLUS)
14190         {
14191           reg0 = XEXP (XEXP (a, 0), 0);
14192           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14193         }
14194       else
14195         reg0 = XEXP (a, 0);
14196
14197       if (GET_CODE (XEXP (b, 0)) == PLUS)
14198         {
14199           reg1 = XEXP (XEXP (b, 0), 0);
14200           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14201         }
14202       else
14203         reg1 = XEXP (b, 0);
14204
14205       /* Don't accept any offset that will require multiple
14206          instructions to handle, since this would cause the
14207          arith_adjacentmem pattern to output an overlong sequence.  */
14208       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14209         return 0;
14210
14211       /* Don't allow an eliminable register: register elimination can make
14212          the offset too large.  */
14213       if (arm_eliminable_register (reg0))
14214         return 0;
14215
14216       val_diff = val1 - val0;
14217
14218       if (arm_ld_sched)
14219         {
14220           /* If the target has load delay slots, then there's no benefit
14221              to using an ldm instruction unless the offset is zero and
14222              we are optimizing for size.  */
14223           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14224                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14225                   && (val_diff == 4 || val_diff == -4));
14226         }
14227
14228       return ((REGNO (reg0) == REGNO (reg1))
14229               && (val_diff == 4 || val_diff == -4));
14230     }
14231
14232   return 0;
14233 }
14234
14235 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
14236    for load operations, false for store operations.  CONSECUTIVE is true
14237    if the register numbers in the operation must be consecutive in the register
14238    bank. RETURN_PC is true if value is to be loaded in PC.
14239    The pattern we are trying to match for load is:
14240      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14241       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14242        :
14243        :
14244       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14245      ]
14246      where
14247      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14248      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14249      3.  If consecutive is TRUE, then for kth register being loaded,
14250          REGNO (R_dk) = REGNO (R_d0) + k.
14251    The pattern for store is similar.  */
14252 bool
14253 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14254                      bool consecutive, bool return_pc)
14255 {
14256   HOST_WIDE_INT count = XVECLEN (op, 0);
14257   rtx reg, mem, addr;
14258   unsigned regno;
14259   unsigned first_regno;
14260   HOST_WIDE_INT i = 1, base = 0, offset = 0;
14261   rtx elt;
14262   bool addr_reg_in_reglist = false;
14263   bool update = false;
14264   int reg_increment;
14265   int offset_adj;
14266   int regs_per_val;
14267
14268   /* If not in SImode, then registers must be consecutive
14269      (e.g., VLDM instructions for DFmode).  */
14270   gcc_assert ((mode == SImode) || consecutive);
14271   /* Setting return_pc for stores is illegal.  */
14272   gcc_assert (!return_pc || load);
14273
14274   /* Set up the increments and the regs per val based on the mode.  */
14275   reg_increment = GET_MODE_SIZE (mode);
14276   regs_per_val = reg_increment / 4;
14277   offset_adj = return_pc ? 1 : 0;
14278
14279   if (count <= 1
14280       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14281       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14282     return false;
14283
14284   /* Check if this is a write-back.  */
14285   elt = XVECEXP (op, 0, offset_adj);
14286   if (GET_CODE (SET_SRC (elt)) == PLUS)
14287     {
14288       i++;
14289       base = 1;
14290       update = true;
14291
14292       /* The offset adjustment must be the number of registers being
14293          popped times the size of a single register.  */
14294       if (!REG_P (SET_DEST (elt))
14295           || !REG_P (XEXP (SET_SRC (elt), 0))
14296           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14297           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14298           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14299              ((count - 1 - offset_adj) * reg_increment))
14300         return false;
14301     }
14302
14303   i = i + offset_adj;
14304   base = base + offset_adj;
14305   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14306      success depends on the type: VLDM can do just one reg,
14307      LDM must do at least two.  */
14308   if ((count <= i) && (mode == SImode))
14309       return false;
14310
14311   elt = XVECEXP (op, 0, i - 1);
14312   if (GET_CODE (elt) != SET)
14313     return false;
14314
14315   if (load)
14316     {
14317       reg = SET_DEST (elt);
14318       mem = SET_SRC (elt);
14319     }
14320   else
14321     {
14322       reg = SET_SRC (elt);
14323       mem = SET_DEST (elt);
14324     }
14325
14326   if (!REG_P (reg) || !MEM_P (mem))
14327     return false;
14328
14329   regno = REGNO (reg);
14330   first_regno = regno;
14331   addr = XEXP (mem, 0);
14332   if (GET_CODE (addr) == PLUS)
14333     {
14334       if (!CONST_INT_P (XEXP (addr, 1)))
14335         return false;
14336
14337       offset = INTVAL (XEXP (addr, 1));
14338       addr = XEXP (addr, 0);
14339     }
14340
14341   if (!REG_P (addr))
14342     return false;
14343
14344   /* Don't allow SP to be loaded unless it is also the base register. It
14345      guarantees that SP is reset correctly when an LDM instruction
14346      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14347   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14348     return false;
14349
14350   if (regno == REGNO (addr))
14351     addr_reg_in_reglist = true;
14352
14353   for (; i < count; i++)
14354     {
14355       elt = XVECEXP (op, 0, i);
14356       if (GET_CODE (elt) != SET)
14357         return false;
14358
14359       if (load)
14360         {
14361           reg = SET_DEST (elt);
14362           mem = SET_SRC (elt);
14363         }
14364       else
14365         {
14366           reg = SET_SRC (elt);
14367           mem = SET_DEST (elt);
14368         }
14369
14370       if (!REG_P (reg)
14371           || GET_MODE (reg) != mode
14372           || REGNO (reg) <= regno
14373           || (consecutive
14374               && (REGNO (reg) !=
14375                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14376           /* Don't allow SP to be loaded unless it is also the base register. It
14377              guarantees that SP is reset correctly when an LDM instruction
14378              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14379           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14380           || !MEM_P (mem)
14381           || GET_MODE (mem) != mode
14382           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14383                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14384                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14385                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14386                    offset + (i - base) * reg_increment))
14387               && (!REG_P (XEXP (mem, 0))
14388                   || offset + (i - base) * reg_increment != 0)))
14389         return false;
14390
14391       regno = REGNO (reg);
14392       if (regno == REGNO (addr))
14393         addr_reg_in_reglist = true;
14394     }
14395
14396   if (load)
14397     {
14398       if (update && addr_reg_in_reglist)
14399         return false;
14400
14401       /* For Thumb-1, address register is always modified - either by write-back
14402          or by explicit load.  If the pattern does not describe an update,
14403          then the address register must be in the list of loaded registers.  */
14404       if (TARGET_THUMB1)
14405         return update || addr_reg_in_reglist;
14406     }
14407
14408   return true;
14409 }
14410
14411 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14412    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14413    following form:
14414
14415    [(set (reg:SI <N>) (const_int 0))
14416     (set (reg:SI <M>) (const_int 0))
14417     ...
14418     (unspec_volatile [(const_int 0)]
14419                      VUNSPEC_CLRM_APSR)
14420     (clobber (reg:CC CC_REGNUM))
14421    ]
14422
14423    Any number (including 0) of set expressions is valid, the volatile unspec is
14424    optional.  All registers but SP and PC are allowed and registers must be in
14425    strict increasing order.
14426
14427    To be a valid VSCCLRM pattern, OP must have the following form:
14428
14429    [(unspec_volatile [(const_int 0)]
14430                      VUNSPEC_VSCCLRM_VPR)
14431     (set (reg:SF <N>) (const_int 0))
14432     (set (reg:SF <M>) (const_int 0))
14433     ...
14434    ]
14435
14436    As with CLRM, any number (including 0) of set expressions is valid, however
14437    the volatile unspec is mandatory here.  Any VFP single-precision register is
14438    accepted but all registers must be consecutive and in increasing order.  */
14439
14440 bool
14441 clear_operation_p (rtx op, bool vfp)
14442 {
14443   unsigned regno;
14444   unsigned last_regno = INVALID_REGNUM;
14445   rtx elt, reg, zero;
14446   int count = XVECLEN (op, 0);
14447   int first_set = vfp ? 1 : 0;
14448   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14449
14450   for (int i = first_set; i < count; i++)
14451     {
14452       elt = XVECEXP (op, 0, i);
14453
14454       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14455         {
14456           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14457               || XVECLEN (elt, 0) != 1
14458               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14459               || i != count - 2)
14460             return false;
14461
14462           continue;
14463         }
14464
14465       if (GET_CODE (elt) == CLOBBER)
14466         continue;
14467
14468       if (GET_CODE (elt) != SET)
14469         return false;
14470
14471       reg = SET_DEST (elt);
14472       zero = SET_SRC (elt);
14473
14474       if (!REG_P (reg)
14475           || GET_MODE (reg) != expected_mode
14476           || zero != CONST0_RTX (SImode))
14477         return false;
14478
14479       regno = REGNO (reg);
14480
14481       if (vfp)
14482         {
14483           if (i != first_set && regno != last_regno + 1)
14484             return false;
14485         }
14486       else
14487         {
14488           if (regno == SP_REGNUM || regno == PC_REGNUM)
14489             return false;
14490           if (i != first_set && regno <= last_regno)
14491             return false;
14492         }
14493
14494       last_regno = regno;
14495     }
14496
14497   return true;
14498 }
14499
14500 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14501    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14502    instruction.  ADD_OFFSET is nonzero if the base address register needs
14503    to be modified with an add instruction before we can use it.  */
14504
14505 static bool
14506 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14507                                  int nops, HOST_WIDE_INT add_offset)
14508  {
14509   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14510      if the offset isn't small enough.  The reason 2 ldrs are faster
14511      is because these ARMs are able to do more than one cache access
14512      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14513      whilst the ARM8 has a double bandwidth cache.  This means that
14514      these cores can do both an instruction fetch and a data fetch in
14515      a single cycle, so the trick of calculating the address into a
14516      scratch register (one of the result regs) and then doing a load
14517      multiple actually becomes slower (and no smaller in code size).
14518      That is the transformation
14519
14520         ldr     rd1, [rbase + offset]
14521         ldr     rd2, [rbase + offset + 4]
14522
14523      to
14524
14525         add     rd1, rbase, offset
14526         ldmia   rd1, {rd1, rd2}
14527
14528      produces worse code -- '3 cycles + any stalls on rd2' instead of
14529      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14530      access per cycle, the first sequence could never complete in less
14531      than 6 cycles, whereas the ldm sequence would only take 5 and
14532      would make better use of sequential accesses if not hitting the
14533      cache.
14534
14535      We cheat here and test 'arm_ld_sched' which we currently know to
14536      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14537      changes, then the test below needs to be reworked.  */
14538   if (nops == 2 && arm_ld_sched && add_offset != 0)
14539     return false;
14540
14541   /* XScale has load-store double instructions, but they have stricter
14542      alignment requirements than load-store multiple, so we cannot
14543      use them.
14544
14545      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14546      the pipeline until completion.
14547
14548         NREGS           CYCLES
14549           1               3
14550           2               4
14551           3               5
14552           4               6
14553
14554      An ldr instruction takes 1-3 cycles, but does not block the
14555      pipeline.
14556
14557         NREGS           CYCLES
14558           1              1-3
14559           2              2-6
14560           3              3-9
14561           4              4-12
14562
14563      Best case ldr will always win.  However, the more ldr instructions
14564      we issue, the less likely we are to be able to schedule them well.
14565      Using ldr instructions also increases code size.
14566
14567      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14568      for counts of 3 or 4 regs.  */
14569   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14570     return false;
14571   return true;
14572 }
14573
14574 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14575    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14576    an array ORDER which describes the sequence to use when accessing the
14577    offsets that produces an ascending order.  In this sequence, each
14578    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14579    must have been filled in with the lowest offset by the caller.
14580    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14581    we use to verify that ORDER produces an ascending order of registers.
14582    Return true if it was possible to construct such an order, false if
14583    not.  */
14584
14585 static bool
14586 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14587                       int *unsorted_regs)
14588 {
14589   int i;
14590   for (i = 1; i < nops; i++)
14591     {
14592       int j;
14593
14594       order[i] = order[i - 1];
14595       for (j = 0; j < nops; j++)
14596         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14597           {
14598             /* We must find exactly one offset that is higher than the
14599                previous one by 4.  */
14600             if (order[i] != order[i - 1])
14601               return false;
14602             order[i] = j;
14603           }
14604       if (order[i] == order[i - 1])
14605         return false;
14606       /* The register numbers must be ascending.  */
14607       if (unsorted_regs != NULL
14608           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14609         return false;
14610     }
14611   return true;
14612 }
14613
14614 /* Used to determine in a peephole whether a sequence of load
14615    instructions can be changed into a load-multiple instruction.
14616    NOPS is the number of separate load instructions we are examining.  The
14617    first NOPS entries in OPERANDS are the destination registers, the
14618    next NOPS entries are memory operands.  If this function is
14619    successful, *BASE is set to the common base register of the memory
14620    accesses; *LOAD_OFFSET is set to the first memory location's offset
14621    from that base register.
14622    REGS is an array filled in with the destination register numbers.
14623    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14624    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14625    the sequence of registers in REGS matches the loads from ascending memory
14626    locations, and the function verifies that the register numbers are
14627    themselves ascending.  If CHECK_REGS is false, the register numbers
14628    are stored in the order they are found in the operands.  */
14629 static int
14630 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14631                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14632 {
14633   int unsorted_regs[MAX_LDM_STM_OPS];
14634   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14635   int order[MAX_LDM_STM_OPS];
14636   int base_reg = -1;
14637   int i, ldm_case;
14638
14639   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14640      easily extended if required.  */
14641   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14642
14643   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14644
14645   /* Loop over the operands and check that the memory references are
14646      suitable (i.e. immediate offsets from the same base register).  At
14647      the same time, extract the target register, and the memory
14648      offsets.  */
14649   for (i = 0; i < nops; i++)
14650     {
14651       rtx reg;
14652       rtx offset;
14653
14654       /* Convert a subreg of a mem into the mem itself.  */
14655       if (GET_CODE (operands[nops + i]) == SUBREG)
14656         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14657
14658       gcc_assert (MEM_P (operands[nops + i]));
14659
14660       /* Don't reorder volatile memory references; it doesn't seem worth
14661          looking for the case where the order is ok anyway.  */
14662       if (MEM_VOLATILE_P (operands[nops + i]))
14663         return 0;
14664
14665       offset = const0_rtx;
14666
14667       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14668            || (SUBREG_P (reg)
14669                && REG_P (reg = SUBREG_REG (reg))))
14670           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14671               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14672                   || (SUBREG_P (reg)
14673                       && REG_P (reg = SUBREG_REG (reg))))
14674               && (CONST_INT_P (offset
14675                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14676         {
14677           if (i == 0)
14678             {
14679               base_reg = REGNO (reg);
14680               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14681                 return 0;
14682             }
14683           else if (base_reg != (int) REGNO (reg))
14684             /* Not addressed from the same base register.  */
14685             return 0;
14686
14687           unsorted_regs[i] = (REG_P (operands[i])
14688                               ? REGNO (operands[i])
14689                               : REGNO (SUBREG_REG (operands[i])));
14690
14691           /* If it isn't an integer register, or if it overwrites the
14692              base register but isn't the last insn in the list, then
14693              we can't do this.  */
14694           if (unsorted_regs[i] < 0
14695               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14696               || unsorted_regs[i] > 14
14697               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14698             return 0;
14699
14700           /* Don't allow SP to be loaded unless it is also the base
14701              register.  It guarantees that SP is reset correctly when
14702              an LDM instruction is interrupted.  Otherwise, we might
14703              end up with a corrupt stack.  */
14704           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14705             return 0;
14706
14707           unsorted_offsets[i] = INTVAL (offset);
14708           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14709             order[0] = i;
14710         }
14711       else
14712         /* Not a suitable memory address.  */
14713         return 0;
14714     }
14715
14716   /* All the useful information has now been extracted from the
14717      operands into unsorted_regs and unsorted_offsets; additionally,
14718      order[0] has been set to the lowest offset in the list.  Sort
14719      the offsets into order, verifying that they are adjacent, and
14720      check that the register numbers are ascending.  */
14721   if (!compute_offset_order (nops, unsorted_offsets, order,
14722                              check_regs ? unsorted_regs : NULL))
14723     return 0;
14724
14725   if (saved_order)
14726     memcpy (saved_order, order, sizeof order);
14727
14728   if (base)
14729     {
14730       *base = base_reg;
14731
14732       for (i = 0; i < nops; i++)
14733         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14734
14735       *load_offset = unsorted_offsets[order[0]];
14736     }
14737
14738   if (unsorted_offsets[order[0]] == 0)
14739     ldm_case = 1; /* ldmia */
14740   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14741     ldm_case = 2; /* ldmib */
14742   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14743     ldm_case = 3; /* ldmda */
14744   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14745     ldm_case = 4; /* ldmdb */
14746   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14747            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14748     ldm_case = 5;
14749   else
14750     return 0;
14751
14752   if (!multiple_operation_profitable_p (false, nops,
14753                                         ldm_case == 5
14754                                         ? unsorted_offsets[order[0]] : 0))
14755     return 0;
14756
14757   return ldm_case;
14758 }
14759
14760 /* Used to determine in a peephole whether a sequence of store instructions can
14761    be changed into a store-multiple instruction.
14762    NOPS is the number of separate store instructions we are examining.
14763    NOPS_TOTAL is the total number of instructions recognized by the peephole
14764    pattern.
14765    The first NOPS entries in OPERANDS are the source registers, the next
14766    NOPS entries are memory operands.  If this function is successful, *BASE is
14767    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14768    to the first memory location's offset from that base register.  REGS is an
14769    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14770    likewise filled with the corresponding rtx's.
14771    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14772    numbers to an ascending order of stores.
14773    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14774    from ascending memory locations, and the function verifies that the register
14775    numbers are themselves ascending.  If CHECK_REGS is false, the register
14776    numbers are stored in the order they are found in the operands.  */
14777 static int
14778 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14779                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14780                          HOST_WIDE_INT *load_offset, bool check_regs)
14781 {
14782   int unsorted_regs[MAX_LDM_STM_OPS];
14783   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14784   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14785   int order[MAX_LDM_STM_OPS];
14786   int base_reg = -1;
14787   rtx base_reg_rtx = NULL;
14788   int i, stm_case;
14789
14790   /* Write back of base register is currently only supported for Thumb 1.  */
14791   int base_writeback = TARGET_THUMB1;
14792
14793   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14794      easily extended if required.  */
14795   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14796
14797   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14798
14799   /* Loop over the operands and check that the memory references are
14800      suitable (i.e. immediate offsets from the same base register).  At
14801      the same time, extract the target register, and the memory
14802      offsets.  */
14803   for (i = 0; i < nops; i++)
14804     {
14805       rtx reg;
14806       rtx offset;
14807
14808       /* Convert a subreg of a mem into the mem itself.  */
14809       if (GET_CODE (operands[nops + i]) == SUBREG)
14810         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14811
14812       gcc_assert (MEM_P (operands[nops + i]));
14813
14814       /* Don't reorder volatile memory references; it doesn't seem worth
14815          looking for the case where the order is ok anyway.  */
14816       if (MEM_VOLATILE_P (operands[nops + i]))
14817         return 0;
14818
14819       offset = const0_rtx;
14820
14821       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14822            || (SUBREG_P (reg)
14823                && REG_P (reg = SUBREG_REG (reg))))
14824           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14825               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14826                   || (SUBREG_P (reg)
14827                       && REG_P (reg = SUBREG_REG (reg))))
14828               && (CONST_INT_P (offset
14829                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14830         {
14831           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14832                                   ? operands[i] : SUBREG_REG (operands[i]));
14833           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14834
14835           if (i == 0)
14836             {
14837               base_reg = REGNO (reg);
14838               base_reg_rtx = reg;
14839               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14840                 return 0;
14841             }
14842           else if (base_reg != (int) REGNO (reg))
14843             /* Not addressed from the same base register.  */
14844             return 0;
14845
14846           /* If it isn't an integer register, then we can't do this.  */
14847           if (unsorted_regs[i] < 0
14848               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14849               /* The effects are unpredictable if the base register is
14850                  both updated and stored.  */
14851               || (base_writeback && unsorted_regs[i] == base_reg)
14852               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14853               || unsorted_regs[i] > 14)
14854             return 0;
14855
14856           unsorted_offsets[i] = INTVAL (offset);
14857           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14858             order[0] = i;
14859         }
14860       else
14861         /* Not a suitable memory address.  */
14862         return 0;
14863     }
14864
14865   /* All the useful information has now been extracted from the
14866      operands into unsorted_regs and unsorted_offsets; additionally,
14867      order[0] has been set to the lowest offset in the list.  Sort
14868      the offsets into order, verifying that they are adjacent, and
14869      check that the register numbers are ascending.  */
14870   if (!compute_offset_order (nops, unsorted_offsets, order,
14871                              check_regs ? unsorted_regs : NULL))
14872     return 0;
14873
14874   if (saved_order)
14875     memcpy (saved_order, order, sizeof order);
14876
14877   if (base)
14878     {
14879       *base = base_reg;
14880
14881       for (i = 0; i < nops; i++)
14882         {
14883           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14884           if (reg_rtxs)
14885             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14886         }
14887
14888       *load_offset = unsorted_offsets[order[0]];
14889     }
14890
14891   if (TARGET_THUMB1
14892       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14893     return 0;
14894
14895   if (unsorted_offsets[order[0]] == 0)
14896     stm_case = 1; /* stmia */
14897   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14898     stm_case = 2; /* stmib */
14899   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14900     stm_case = 3; /* stmda */
14901   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14902     stm_case = 4; /* stmdb */
14903   else
14904     return 0;
14905
14906   if (!multiple_operation_profitable_p (false, nops, 0))
14907     return 0;
14908
14909   return stm_case;
14910 }
14911 \f
14912 /* Routines for use in generating RTL.  */
14913
14914 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14915    the instruction; REGS and MEMS are arrays containing the operands.
14916    BASEREG is the base register to be used in addressing the memory operands.
14917    WBACK_OFFSET is nonzero if the instruction should update the base
14918    register.  */
14919
14920 static rtx
14921 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14922                          HOST_WIDE_INT wback_offset)
14923 {
14924   int i = 0, j;
14925   rtx result;
14926
14927   if (!multiple_operation_profitable_p (false, count, 0))
14928     {
14929       rtx seq;
14930
14931       start_sequence ();
14932
14933       for (i = 0; i < count; i++)
14934         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14935
14936       if (wback_offset != 0)
14937         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14938
14939       seq = get_insns ();
14940       end_sequence ();
14941
14942       return seq;
14943     }
14944
14945   result = gen_rtx_PARALLEL (VOIDmode,
14946                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14947   if (wback_offset != 0)
14948     {
14949       XVECEXP (result, 0, 0)
14950         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14951       i = 1;
14952       count++;
14953     }
14954
14955   for (j = 0; i < count; i++, j++)
14956     XVECEXP (result, 0, i)
14957       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14958
14959   return result;
14960 }
14961
14962 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14963    the instruction; REGS and MEMS are arrays containing the operands.
14964    BASEREG is the base register to be used in addressing the memory operands.
14965    WBACK_OFFSET is nonzero if the instruction should update the base
14966    register.  */
14967
14968 static rtx
14969 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14970                           HOST_WIDE_INT wback_offset)
14971 {
14972   int i = 0, j;
14973   rtx result;
14974
14975   if (GET_CODE (basereg) == PLUS)
14976     basereg = XEXP (basereg, 0);
14977
14978   if (!multiple_operation_profitable_p (false, count, 0))
14979     {
14980       rtx seq;
14981
14982       start_sequence ();
14983
14984       for (i = 0; i < count; i++)
14985         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14986
14987       if (wback_offset != 0)
14988         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14989
14990       seq = get_insns ();
14991       end_sequence ();
14992
14993       return seq;
14994     }
14995
14996   result = gen_rtx_PARALLEL (VOIDmode,
14997                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14998   if (wback_offset != 0)
14999     {
15000       XVECEXP (result, 0, 0)
15001         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
15002       i = 1;
15003       count++;
15004     }
15005
15006   for (j = 0; i < count; i++, j++)
15007     XVECEXP (result, 0, i)
15008       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
15009
15010   return result;
15011 }
15012
15013 /* Generate either a load-multiple or a store-multiple instruction.  This
15014    function can be used in situations where we can start with a single MEM
15015    rtx and adjust its address upwards.
15016    COUNT is the number of operations in the instruction, not counting a
15017    possible update of the base register.  REGS is an array containing the
15018    register operands.
15019    BASEREG is the base register to be used in addressing the memory operands,
15020    which are constructed from BASEMEM.
15021    WRITE_BACK specifies whether the generated instruction should include an
15022    update of the base register.
15023    OFFSETP is used to pass an offset to and from this function; this offset
15024    is not used when constructing the address (instead BASEMEM should have an
15025    appropriate offset in its address), it is used only for setting
15026    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
15027
15028 static rtx
15029 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
15030                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
15031 {
15032   rtx mems[MAX_LDM_STM_OPS];
15033   HOST_WIDE_INT offset = *offsetp;
15034   int i;
15035
15036   gcc_assert (count <= MAX_LDM_STM_OPS);
15037
15038   if (GET_CODE (basereg) == PLUS)
15039     basereg = XEXP (basereg, 0);
15040
15041   for (i = 0; i < count; i++)
15042     {
15043       rtx addr = plus_constant (Pmode, basereg, i * 4);
15044       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15045       offset += 4;
15046     }
15047
15048   if (write_back)
15049     *offsetp = offset;
15050
15051   if (is_load)
15052     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15053                                     write_back ? 4 * count : 0);
15054   else
15055     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15056                                      write_back ? 4 * count : 0);
15057 }
15058
15059 rtx
15060 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15061                        rtx basemem, HOST_WIDE_INT *offsetp)
15062 {
15063   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15064                               offsetp);
15065 }
15066
15067 rtx
15068 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15069                         rtx basemem, HOST_WIDE_INT *offsetp)
15070 {
15071   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15072                               offsetp);
15073 }
15074
15075 /* Called from a peephole2 expander to turn a sequence of loads into an
15076    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
15077    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
15078    is true if we can reorder the registers because they are used commutatively
15079    subsequently.
15080    Returns true iff we could generate a new instruction.  */
15081
15082 bool
15083 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15084 {
15085   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15086   rtx mems[MAX_LDM_STM_OPS];
15087   int i, j, base_reg;
15088   rtx base_reg_rtx;
15089   HOST_WIDE_INT offset;
15090   int write_back = FALSE;
15091   int ldm_case;
15092   rtx addr;
15093
15094   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15095                                      &base_reg, &offset, !sort_regs);
15096
15097   if (ldm_case == 0)
15098     return false;
15099
15100   if (sort_regs)
15101     for (i = 0; i < nops - 1; i++)
15102       for (j = i + 1; j < nops; j++)
15103         if (regs[i] > regs[j])
15104           {
15105             int t = regs[i];
15106             regs[i] = regs[j];
15107             regs[j] = t;
15108           }
15109   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15110
15111   if (TARGET_THUMB1)
15112     {
15113       gcc_assert (ldm_case == 1 || ldm_case == 5);
15114
15115       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
15116       write_back = true;
15117       for (i = 0; i < nops; i++)
15118         if (base_reg == regs[i])
15119           write_back = false;
15120
15121       /* Ensure the base is dead if it is updated.  */
15122       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15123         return false;
15124     }
15125
15126   if (ldm_case == 5)
15127     {
15128       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15129       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15130       offset = 0;
15131       base_reg_rtx = newbase;
15132     }
15133
15134   for (i = 0; i < nops; i++)
15135     {
15136       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15137       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15138                                               SImode, addr, 0);
15139     }
15140   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15141                                       write_back ? offset + i * 4 : 0));
15142   return true;
15143 }
15144
15145 /* Called from a peephole2 expander to turn a sequence of stores into an
15146    STM instruction.  OPERANDS are the operands found by the peephole matcher;
15147    NOPS indicates how many separate stores we are trying to combine.
15148    Returns true iff we could generate a new instruction.  */
15149
15150 bool
15151 gen_stm_seq (rtx *operands, int nops)
15152 {
15153   int i;
15154   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15155   rtx mems[MAX_LDM_STM_OPS];
15156   int base_reg;
15157   rtx base_reg_rtx;
15158   HOST_WIDE_INT offset;
15159   int write_back = FALSE;
15160   int stm_case;
15161   rtx addr;
15162   bool base_reg_dies;
15163
15164   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15165                                       mem_order, &base_reg, &offset, true);
15166
15167   if (stm_case == 0)
15168     return false;
15169
15170   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15171
15172   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15173   if (TARGET_THUMB1)
15174     {
15175       gcc_assert (base_reg_dies);
15176       write_back = TRUE;
15177     }
15178
15179   if (stm_case == 5)
15180     {
15181       gcc_assert (base_reg_dies);
15182       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15183       offset = 0;
15184     }
15185
15186   addr = plus_constant (Pmode, base_reg_rtx, offset);
15187
15188   for (i = 0; i < nops; i++)
15189     {
15190       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15191       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15192                                               SImode, addr, 0);
15193     }
15194   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15195                                        write_back ? offset + i * 4 : 0));
15196   return true;
15197 }
15198
15199 /* Called from a peephole2 expander to turn a sequence of stores that are
15200    preceded by constant loads into an STM instruction.  OPERANDS are the
15201    operands found by the peephole matcher; NOPS indicates how many
15202    separate stores we are trying to combine; there are 2 * NOPS
15203    instructions in the peephole.
15204    Returns true iff we could generate a new instruction.  */
15205
15206 bool
15207 gen_const_stm_seq (rtx *operands, int nops)
15208 {
15209   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15210   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15211   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15212   rtx mems[MAX_LDM_STM_OPS];
15213   int base_reg;
15214   rtx base_reg_rtx;
15215   HOST_WIDE_INT offset;
15216   int write_back = FALSE;
15217   int stm_case;
15218   rtx addr;
15219   bool base_reg_dies;
15220   int i, j;
15221   HARD_REG_SET allocated;
15222
15223   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15224                                       mem_order, &base_reg, &offset, false);
15225
15226   if (stm_case == 0)
15227     return false;
15228
15229   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15230
15231   /* If the same register is used more than once, try to find a free
15232      register.  */
15233   CLEAR_HARD_REG_SET (allocated);
15234   for (i = 0; i < nops; i++)
15235     {
15236       for (j = i + 1; j < nops; j++)
15237         if (regs[i] == regs[j])
15238           {
15239             rtx t = peep2_find_free_register (0, nops * 2,
15240                                               TARGET_THUMB1 ? "l" : "r",
15241                                               SImode, &allocated);
15242             if (t == NULL_RTX)
15243               return false;
15244             reg_rtxs[i] = t;
15245             regs[i] = REGNO (t);
15246           }
15247     }
15248
15249   /* Compute an ordering that maps the register numbers to an ascending
15250      sequence.  */
15251   reg_order[0] = 0;
15252   for (i = 0; i < nops; i++)
15253     if (regs[i] < regs[reg_order[0]])
15254       reg_order[0] = i;
15255
15256   for (i = 1; i < nops; i++)
15257     {
15258       int this_order = reg_order[i - 1];
15259       for (j = 0; j < nops; j++)
15260         if (regs[j] > regs[reg_order[i - 1]]
15261             && (this_order == reg_order[i - 1]
15262                 || regs[j] < regs[this_order]))
15263           this_order = j;
15264       reg_order[i] = this_order;
15265     }
15266
15267   /* Ensure that registers that must be live after the instruction end
15268      up with the correct value.  */
15269   for (i = 0; i < nops; i++)
15270     {
15271       int this_order = reg_order[i];
15272       if ((this_order != mem_order[i]
15273            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15274           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15275         return false;
15276     }
15277
15278   /* Load the constants.  */
15279   for (i = 0; i < nops; i++)
15280     {
15281       rtx op = operands[2 * nops + mem_order[i]];
15282       sorted_regs[i] = regs[reg_order[i]];
15283       emit_move_insn (reg_rtxs[reg_order[i]], op);
15284     }
15285
15286   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15287
15288   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15289   if (TARGET_THUMB1)
15290     {
15291       gcc_assert (base_reg_dies);
15292       write_back = TRUE;
15293     }
15294
15295   if (stm_case == 5)
15296     {
15297       gcc_assert (base_reg_dies);
15298       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15299       offset = 0;
15300     }
15301
15302   addr = plus_constant (Pmode, base_reg_rtx, offset);
15303
15304   for (i = 0; i < nops; i++)
15305     {
15306       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15307       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15308                                               SImode, addr, 0);
15309     }
15310   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15311                                        write_back ? offset + i * 4 : 0));
15312   return true;
15313 }
15314
15315 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15316    unaligned copies on processors which support unaligned semantics for those
15317    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
15318    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15319    An interleave factor of 1 (the minimum) will perform no interleaving.
15320    Load/store multiple are used for aligned addresses where possible.  */
15321
15322 static void
15323 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15324                                    HOST_WIDE_INT length,
15325                                    unsigned int interleave_factor)
15326 {
15327   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15328   int *regnos = XALLOCAVEC (int, interleave_factor);
15329   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15330   HOST_WIDE_INT i, j;
15331   HOST_WIDE_INT remaining = length, words;
15332   rtx halfword_tmp = NULL, byte_tmp = NULL;
15333   rtx dst, src;
15334   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15335   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15336   HOST_WIDE_INT srcoffset, dstoffset;
15337   HOST_WIDE_INT src_autoinc, dst_autoinc;
15338   rtx mem, addr;
15339
15340   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15341
15342   /* Use hard registers if we have aligned source or destination so we can use
15343      load/store multiple with contiguous registers.  */
15344   if (dst_aligned || src_aligned)
15345     for (i = 0; i < interleave_factor; i++)
15346       regs[i] = gen_rtx_REG (SImode, i);
15347   else
15348     for (i = 0; i < interleave_factor; i++)
15349       regs[i] = gen_reg_rtx (SImode);
15350
15351   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15352   src = copy_addr_to_reg (XEXP (srcbase, 0));
15353
15354   srcoffset = dstoffset = 0;
15355
15356   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15357      For copying the last bytes we want to subtract this offset again.  */
15358   src_autoinc = dst_autoinc = 0;
15359
15360   for (i = 0; i < interleave_factor; i++)
15361     regnos[i] = i;
15362
15363   /* Copy BLOCK_SIZE_BYTES chunks.  */
15364
15365   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15366     {
15367       /* Load words.  */
15368       if (src_aligned && interleave_factor > 1)
15369         {
15370           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15371                                             TRUE, srcbase, &srcoffset));
15372           src_autoinc += UNITS_PER_WORD * interleave_factor;
15373         }
15374       else
15375         {
15376           for (j = 0; j < interleave_factor; j++)
15377             {
15378               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15379                                                  - src_autoinc));
15380               mem = adjust_automodify_address (srcbase, SImode, addr,
15381                                                srcoffset + j * UNITS_PER_WORD);
15382               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15383             }
15384           srcoffset += block_size_bytes;
15385         }
15386
15387       /* Store words.  */
15388       if (dst_aligned && interleave_factor > 1)
15389         {
15390           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15391                                              TRUE, dstbase, &dstoffset));
15392           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15393         }
15394       else
15395         {
15396           for (j = 0; j < interleave_factor; j++)
15397             {
15398               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15399                                                  - dst_autoinc));
15400               mem = adjust_automodify_address (dstbase, SImode, addr,
15401                                                dstoffset + j * UNITS_PER_WORD);
15402               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15403             }
15404           dstoffset += block_size_bytes;
15405         }
15406
15407       remaining -= block_size_bytes;
15408     }
15409
15410   /* Copy any whole words left (note these aren't interleaved with any
15411      subsequent halfword/byte load/stores in the interests of simplicity).  */
15412
15413   words = remaining / UNITS_PER_WORD;
15414
15415   gcc_assert (words < interleave_factor);
15416
15417   if (src_aligned && words > 1)
15418     {
15419       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15420                                         &srcoffset));
15421       src_autoinc += UNITS_PER_WORD * words;
15422     }
15423   else
15424     {
15425       for (j = 0; j < words; j++)
15426         {
15427           addr = plus_constant (Pmode, src,
15428                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15429           mem = adjust_automodify_address (srcbase, SImode, addr,
15430                                            srcoffset + j * UNITS_PER_WORD);
15431           if (src_aligned)
15432             emit_move_insn (regs[j], mem);
15433           else
15434             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15435         }
15436       srcoffset += words * UNITS_PER_WORD;
15437     }
15438
15439   if (dst_aligned && words > 1)
15440     {
15441       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15442                                          &dstoffset));
15443       dst_autoinc += words * UNITS_PER_WORD;
15444     }
15445   else
15446     {
15447       for (j = 0; j < words; j++)
15448         {
15449           addr = plus_constant (Pmode, dst,
15450                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15451           mem = adjust_automodify_address (dstbase, SImode, addr,
15452                                            dstoffset + j * UNITS_PER_WORD);
15453           if (dst_aligned)
15454             emit_move_insn (mem, regs[j]);
15455           else
15456             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15457         }
15458       dstoffset += words * UNITS_PER_WORD;
15459     }
15460
15461   remaining -= words * UNITS_PER_WORD;
15462
15463   gcc_assert (remaining < 4);
15464
15465   /* Copy a halfword if necessary.  */
15466
15467   if (remaining >= 2)
15468     {
15469       halfword_tmp = gen_reg_rtx (SImode);
15470
15471       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15472       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15473       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15474
15475       /* Either write out immediately, or delay until we've loaded the last
15476          byte, depending on interleave factor.  */
15477       if (interleave_factor == 1)
15478         {
15479           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15480           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15481           emit_insn (gen_unaligned_storehi (mem,
15482                        gen_lowpart (HImode, halfword_tmp)));
15483           halfword_tmp = NULL;
15484           dstoffset += 2;
15485         }
15486
15487       remaining -= 2;
15488       srcoffset += 2;
15489     }
15490
15491   gcc_assert (remaining < 2);
15492
15493   /* Copy last byte.  */
15494
15495   if ((remaining & 1) != 0)
15496     {
15497       byte_tmp = gen_reg_rtx (SImode);
15498
15499       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15500       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15501       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15502
15503       if (interleave_factor == 1)
15504         {
15505           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15506           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15507           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15508           byte_tmp = NULL;
15509           dstoffset++;
15510         }
15511
15512       remaining--;
15513       srcoffset++;
15514     }
15515
15516   /* Store last halfword if we haven't done so already.  */
15517
15518   if (halfword_tmp)
15519     {
15520       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15521       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15522       emit_insn (gen_unaligned_storehi (mem,
15523                    gen_lowpart (HImode, halfword_tmp)));
15524       dstoffset += 2;
15525     }
15526
15527   /* Likewise for last byte.  */
15528
15529   if (byte_tmp)
15530     {
15531       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15532       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15533       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15534       dstoffset++;
15535     }
15536
15537   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15538 }
15539
15540 /* From mips_adjust_block_mem:
15541
15542    Helper function for doing a loop-based block operation on memory
15543    reference MEM.  Each iteration of the loop will operate on LENGTH
15544    bytes of MEM.
15545
15546    Create a new base register for use within the loop and point it to
15547    the start of MEM.  Create a new memory reference that uses this
15548    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15549
15550 static void
15551 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15552                       rtx *loop_mem)
15553 {
15554   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15555
15556   /* Although the new mem does not refer to a known location,
15557      it does keep up to LENGTH bytes of alignment.  */
15558   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15559   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15560 }
15561
15562 /* From mips_block_move_loop:
15563
15564    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15565    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15566    the memory regions do not overlap.  */
15567
15568 static void
15569 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15570                                unsigned int interleave_factor,
15571                                HOST_WIDE_INT bytes_per_iter)
15572 {
15573   rtx src_reg, dest_reg, final_src, test;
15574   HOST_WIDE_INT leftover;
15575
15576   leftover = length % bytes_per_iter;
15577   length -= leftover;
15578
15579   /* Create registers and memory references for use within the loop.  */
15580   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15581   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15582
15583   /* Calculate the value that SRC_REG should have after the last iteration of
15584      the loop.  */
15585   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15586                                    0, 0, OPTAB_WIDEN);
15587
15588   /* Emit the start of the loop.  */
15589   rtx_code_label *label = gen_label_rtx ();
15590   emit_label (label);
15591
15592   /* Emit the loop body.  */
15593   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15594                                      interleave_factor);
15595
15596   /* Move on to the next block.  */
15597   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15598   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15599
15600   /* Emit the loop condition.  */
15601   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15602   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15603
15604   /* Mop up any left-over bytes.  */
15605   if (leftover)
15606     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15607 }
15608
15609 /* Emit a block move when either the source or destination is unaligned (not
15610    aligned to a four-byte boundary).  This may need further tuning depending on
15611    core type, optimize_size setting, etc.  */
15612
15613 static int
15614 arm_cpymemqi_unaligned (rtx *operands)
15615 {
15616   HOST_WIDE_INT length = INTVAL (operands[2]);
15617
15618   if (optimize_size)
15619     {
15620       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15621       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15622       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15623          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15624          or dst_aligned though: allow more interleaving in those cases since the
15625          resulting code can be smaller.  */
15626       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15627       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15628
15629       if (length > 12)
15630         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15631                                        interleave_factor, bytes_per_iter);
15632       else
15633         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15634                                            interleave_factor);
15635     }
15636   else
15637     {
15638       /* Note that the loop created by arm_block_move_unaligned_loop may be
15639          subject to loop unrolling, which makes tuning this condition a little
15640          redundant.  */
15641       if (length > 32)
15642         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15643       else
15644         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15645     }
15646
15647   return 1;
15648 }
15649
15650 int
15651 arm_gen_cpymemqi (rtx *operands)
15652 {
15653   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15654   HOST_WIDE_INT srcoffset, dstoffset;
15655   rtx src, dst, srcbase, dstbase;
15656   rtx part_bytes_reg = NULL;
15657   rtx mem;
15658
15659   if (!CONST_INT_P (operands[2])
15660       || !CONST_INT_P (operands[3])
15661       || INTVAL (operands[2]) > 64)
15662     return 0;
15663
15664   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15665     return arm_cpymemqi_unaligned (operands);
15666
15667   if (INTVAL (operands[3]) & 3)
15668     return 0;
15669
15670   dstbase = operands[0];
15671   srcbase = operands[1];
15672
15673   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15674   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15675
15676   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15677   out_words_to_go = INTVAL (operands[2]) / 4;
15678   last_bytes = INTVAL (operands[2]) & 3;
15679   dstoffset = srcoffset = 0;
15680
15681   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15682     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15683
15684   while (in_words_to_go >= 2)
15685     {
15686       if (in_words_to_go > 4)
15687         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15688                                           TRUE, srcbase, &srcoffset));
15689       else
15690         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15691                                           src, FALSE, srcbase,
15692                                           &srcoffset));
15693
15694       if (out_words_to_go)
15695         {
15696           if (out_words_to_go > 4)
15697             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15698                                                TRUE, dstbase, &dstoffset));
15699           else if (out_words_to_go != 1)
15700             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15701                                                out_words_to_go, dst,
15702                                                (last_bytes == 0
15703                                                 ? FALSE : TRUE),
15704                                                dstbase, &dstoffset));
15705           else
15706             {
15707               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15708               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15709               if (last_bytes != 0)
15710                 {
15711                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15712                   dstoffset += 4;
15713                 }
15714             }
15715         }
15716
15717       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15718       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15719     }
15720
15721   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15722   if (out_words_to_go)
15723     {
15724       rtx sreg;
15725
15726       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15727       sreg = copy_to_reg (mem);
15728
15729       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15730       emit_move_insn (mem, sreg);
15731       in_words_to_go--;
15732
15733       gcc_assert (!in_words_to_go);     /* Sanity check */
15734     }
15735
15736   if (in_words_to_go)
15737     {
15738       gcc_assert (in_words_to_go > 0);
15739
15740       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15741       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15742     }
15743
15744   gcc_assert (!last_bytes || part_bytes_reg);
15745
15746   if (BYTES_BIG_ENDIAN && last_bytes)
15747     {
15748       rtx tmp = gen_reg_rtx (SImode);
15749
15750       /* The bytes we want are in the top end of the word.  */
15751       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15752                               GEN_INT (8 * (4 - last_bytes))));
15753       part_bytes_reg = tmp;
15754
15755       while (last_bytes)
15756         {
15757           mem = adjust_automodify_address (dstbase, QImode,
15758                                            plus_constant (Pmode, dst,
15759                                                           last_bytes - 1),
15760                                            dstoffset + last_bytes - 1);
15761           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15762
15763           if (--last_bytes)
15764             {
15765               tmp = gen_reg_rtx (SImode);
15766               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15767               part_bytes_reg = tmp;
15768             }
15769         }
15770
15771     }
15772   else
15773     {
15774       if (last_bytes > 1)
15775         {
15776           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15777           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15778           last_bytes -= 2;
15779           if (last_bytes)
15780             {
15781               rtx tmp = gen_reg_rtx (SImode);
15782               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15783               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15784               part_bytes_reg = tmp;
15785               dstoffset += 2;
15786             }
15787         }
15788
15789       if (last_bytes)
15790         {
15791           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15792           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15793         }
15794     }
15795
15796   return 1;
15797 }
15798
15799 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15800 by mode size.  */
15801 inline static rtx
15802 next_consecutive_mem (rtx mem)
15803 {
15804   machine_mode mode = GET_MODE (mem);
15805   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15806   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15807
15808   return adjust_automodify_address (mem, mode, addr, offset);
15809 }
15810
15811 /* Copy using LDRD/STRD instructions whenever possible.
15812    Returns true upon success. */
15813 bool
15814 gen_cpymem_ldrd_strd (rtx *operands)
15815 {
15816   unsigned HOST_WIDE_INT len;
15817   HOST_WIDE_INT align;
15818   rtx src, dst, base;
15819   rtx reg0;
15820   bool src_aligned, dst_aligned;
15821   bool src_volatile, dst_volatile;
15822
15823   gcc_assert (CONST_INT_P (operands[2]));
15824   gcc_assert (CONST_INT_P (operands[3]));
15825
15826   len = UINTVAL (operands[2]);
15827   if (len > 64)
15828     return false;
15829
15830   /* Maximum alignment we can assume for both src and dst buffers.  */
15831   align = INTVAL (operands[3]);
15832
15833   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15834     return false;
15835
15836   /* Place src and dst addresses in registers
15837      and update the corresponding mem rtx.  */
15838   dst = operands[0];
15839   dst_volatile = MEM_VOLATILE_P (dst);
15840   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15841   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15842   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15843
15844   src = operands[1];
15845   src_volatile = MEM_VOLATILE_P (src);
15846   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15847   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15848   src = adjust_automodify_address (src, VOIDmode, base, 0);
15849
15850   if (!unaligned_access && !(src_aligned && dst_aligned))
15851     return false;
15852
15853   if (src_volatile || dst_volatile)
15854     return false;
15855
15856   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15857   if (!(dst_aligned || src_aligned))
15858     return arm_gen_cpymemqi (operands);
15859
15860   /* If the either src or dst is unaligned we'll be accessing it as pairs
15861      of unaligned SImode accesses.  Otherwise we can generate DImode
15862      ldrd/strd instructions.  */
15863   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15864   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15865
15866   while (len >= 8)
15867     {
15868       len -= 8;
15869       reg0 = gen_reg_rtx (DImode);
15870       rtx first_reg = NULL_RTX;
15871       rtx second_reg = NULL_RTX;
15872
15873       if (!src_aligned || !dst_aligned)
15874         {
15875           if (BYTES_BIG_ENDIAN)
15876             {
15877               second_reg = gen_lowpart (SImode, reg0);
15878               first_reg = gen_highpart_mode (SImode, DImode, reg0);
15879             }
15880           else
15881             {
15882               first_reg = gen_lowpart (SImode, reg0);
15883               second_reg = gen_highpart_mode (SImode, DImode, reg0);
15884             }
15885         }
15886       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15887         emit_move_insn (reg0, src);
15888       else if (src_aligned)
15889         emit_insn (gen_unaligned_loaddi (reg0, src));
15890       else
15891         {
15892           emit_insn (gen_unaligned_loadsi (first_reg, src));
15893           src = next_consecutive_mem (src);
15894           emit_insn (gen_unaligned_loadsi (second_reg, src));
15895         }
15896
15897       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15898         emit_move_insn (dst, reg0);
15899       else if (dst_aligned)
15900         emit_insn (gen_unaligned_storedi (dst, reg0));
15901       else
15902         {
15903           emit_insn (gen_unaligned_storesi (dst, first_reg));
15904           dst = next_consecutive_mem (dst);
15905           emit_insn (gen_unaligned_storesi (dst, second_reg));
15906         }
15907
15908       src = next_consecutive_mem (src);
15909       dst = next_consecutive_mem (dst);
15910     }
15911
15912   gcc_assert (len < 8);
15913   if (len >= 4)
15914     {
15915       /* More than a word but less than a double-word to copy.  Copy a word.  */
15916       reg0 = gen_reg_rtx (SImode);
15917       src = adjust_address (src, SImode, 0);
15918       dst = adjust_address (dst, SImode, 0);
15919       if (src_aligned)
15920         emit_move_insn (reg0, src);
15921       else
15922         emit_insn (gen_unaligned_loadsi (reg0, src));
15923
15924       if (dst_aligned)
15925         emit_move_insn (dst, reg0);
15926       else
15927         emit_insn (gen_unaligned_storesi (dst, reg0));
15928
15929       src = next_consecutive_mem (src);
15930       dst = next_consecutive_mem (dst);
15931       len -= 4;
15932     }
15933
15934   if (len == 0)
15935     return true;
15936
15937   /* Copy the remaining bytes.  */
15938   if (len >= 2)
15939     {
15940       dst = adjust_address (dst, HImode, 0);
15941       src = adjust_address (src, HImode, 0);
15942       reg0 = gen_reg_rtx (SImode);
15943       if (src_aligned)
15944         emit_insn (gen_zero_extendhisi2 (reg0, src));
15945       else
15946         emit_insn (gen_unaligned_loadhiu (reg0, src));
15947
15948       if (dst_aligned)
15949         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15950       else
15951         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15952
15953       src = next_consecutive_mem (src);
15954       dst = next_consecutive_mem (dst);
15955       if (len == 2)
15956         return true;
15957     }
15958
15959   dst = adjust_address (dst, QImode, 0);
15960   src = adjust_address (src, QImode, 0);
15961   reg0 = gen_reg_rtx (QImode);
15962   emit_move_insn (reg0, src);
15963   emit_move_insn (dst, reg0);
15964   return true;
15965 }
15966
15967 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15968    into its component 32-bit subregs.  OP2 may be an immediate
15969    constant and we want to simplify it in that case.  */
15970 void
15971 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15972                         rtx *lo_op2, rtx *hi_op2)
15973 {
15974   *lo_op1 = gen_lowpart (SImode, op1);
15975   *hi_op1 = gen_highpart (SImode, op1);
15976   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15977                                  subreg_lowpart_offset (SImode, DImode));
15978   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15979                                  subreg_highpart_offset (SImode, DImode));
15980 }
15981
15982 /* Select a dominance comparison mode if possible for a test of the general
15983    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15984    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15985    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15986    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15987    In all cases OP will be either EQ or NE, but we don't need to know which
15988    here.  If we are unable to support a dominance comparison we return
15989    CC mode.  This will then fail to match for the RTL expressions that
15990    generate this call.  */
15991 machine_mode
15992 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15993 {
15994   enum rtx_code cond1, cond2;
15995   int swapped = 0;
15996
15997   /* Currently we will probably get the wrong result if the individual
15998      comparisons are not simple.  This also ensures that it is safe to
15999      reverse a comparison if necessary.  */
16000   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
16001        != CCmode)
16002       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
16003           != CCmode))
16004     return CCmode;
16005
16006   /* The if_then_else variant of this tests the second condition if the
16007      first passes, but is true if the first fails.  Reverse the first
16008      condition to get a true "inclusive-or" expression.  */
16009   if (cond_or == DOM_CC_NX_OR_Y)
16010     cond1 = reverse_condition (cond1);
16011
16012   /* If the comparisons are not equal, and one doesn't dominate the other,
16013      then we can't do this.  */
16014   if (cond1 != cond2
16015       && !comparison_dominates_p (cond1, cond2)
16016       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
16017     return CCmode;
16018
16019   if (swapped)
16020     std::swap (cond1, cond2);
16021
16022   switch (cond1)
16023     {
16024     case EQ:
16025       if (cond_or == DOM_CC_X_AND_Y)
16026         return CC_DEQmode;
16027
16028       switch (cond2)
16029         {
16030         case EQ: return CC_DEQmode;
16031         case LE: return CC_DLEmode;
16032         case LEU: return CC_DLEUmode;
16033         case GE: return CC_DGEmode;
16034         case GEU: return CC_DGEUmode;
16035         default: gcc_unreachable ();
16036         }
16037
16038     case LT:
16039       if (cond_or == DOM_CC_X_AND_Y)
16040         return CC_DLTmode;
16041
16042       switch (cond2)
16043         {
16044         case  LT:
16045             return CC_DLTmode;
16046         case LE:
16047           return CC_DLEmode;
16048         case NE:
16049           return CC_DNEmode;
16050         default:
16051           gcc_unreachable ();
16052         }
16053
16054     case GT:
16055       if (cond_or == DOM_CC_X_AND_Y)
16056         return CC_DGTmode;
16057
16058       switch (cond2)
16059         {
16060         case GT:
16061           return CC_DGTmode;
16062         case GE:
16063           return CC_DGEmode;
16064         case NE:
16065           return CC_DNEmode;
16066         default:
16067           gcc_unreachable ();
16068         }
16069
16070     case LTU:
16071       if (cond_or == DOM_CC_X_AND_Y)
16072         return CC_DLTUmode;
16073
16074       switch (cond2)
16075         {
16076         case LTU:
16077           return CC_DLTUmode;
16078         case LEU:
16079           return CC_DLEUmode;
16080         case NE:
16081           return CC_DNEmode;
16082         default:
16083           gcc_unreachable ();
16084         }
16085
16086     case GTU:
16087       if (cond_or == DOM_CC_X_AND_Y)
16088         return CC_DGTUmode;
16089
16090       switch (cond2)
16091         {
16092         case GTU:
16093           return CC_DGTUmode;
16094         case GEU:
16095           return CC_DGEUmode;
16096         case NE:
16097           return CC_DNEmode;
16098         default:
16099           gcc_unreachable ();
16100         }
16101
16102     /* The remaining cases only occur when both comparisons are the
16103        same.  */
16104     case NE:
16105       gcc_assert (cond1 == cond2);
16106       return CC_DNEmode;
16107
16108     case LE:
16109       gcc_assert (cond1 == cond2);
16110       return CC_DLEmode;
16111
16112     case GE:
16113       gcc_assert (cond1 == cond2);
16114       return CC_DGEmode;
16115
16116     case LEU:
16117       gcc_assert (cond1 == cond2);
16118       return CC_DLEUmode;
16119
16120     case GEU:
16121       gcc_assert (cond1 == cond2);
16122       return CC_DGEUmode;
16123
16124     default:
16125       gcc_unreachable ();
16126     }
16127 }
16128
16129 machine_mode
16130 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16131 {
16132   /* All floating point compares return CCFP if it is an equality
16133      comparison, and CCFPE otherwise.  */
16134   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16135     {
16136       switch (op)
16137         {
16138         case EQ:
16139         case NE:
16140         case UNORDERED:
16141         case ORDERED:
16142         case UNLT:
16143         case UNLE:
16144         case UNGT:
16145         case UNGE:
16146         case UNEQ:
16147         case LTGT:
16148           return CCFPmode;
16149
16150         case LT:
16151         case LE:
16152         case GT:
16153         case GE:
16154           return CCFPEmode;
16155
16156         default:
16157           gcc_unreachable ();
16158         }
16159     }
16160
16161   /* A compare with a shifted operand.  Because of canonicalization, the
16162      comparison will have to be swapped when we emit the assembler.  */
16163   if (GET_MODE (y) == SImode
16164       && (REG_P (y) || (SUBREG_P (y)))
16165       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16166           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16167           || GET_CODE (x) == ROTATERT))
16168     return CC_SWPmode;
16169
16170   /* A widened compare of the sum of a value plus a carry against a
16171      constant.  This is a representation of RSC.  We want to swap the
16172      result of the comparison at output.  Not valid if the Z bit is
16173      needed.  */
16174   if (GET_MODE (x) == DImode
16175       && GET_CODE (x) == PLUS
16176       && arm_borrow_operation (XEXP (x, 1), DImode)
16177       && CONST_INT_P (y)
16178       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16179            && (op == LE || op == GT))
16180           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16181               && (op == LEU || op == GTU))))
16182     return CC_SWPmode;
16183
16184   /* If X is a constant we want to use CC_RSBmode.  This is
16185      non-canonical, but arm_gen_compare_reg uses this to generate the
16186      correct canonical form.  */
16187   if (GET_MODE (y) == SImode
16188       && (REG_P (y) || SUBREG_P (y))
16189       && CONST_INT_P (x))
16190     return CC_RSBmode;
16191
16192   /* This operation is performed swapped, but since we only rely on the Z
16193      flag we don't need an additional mode.  */
16194   if (GET_MODE (y) == SImode
16195       && (REG_P (y) || (SUBREG_P (y)))
16196       && GET_CODE (x) == NEG
16197       && (op == EQ || op == NE))
16198     return CC_Zmode;
16199
16200   /* This is a special case that is used by combine to allow a
16201      comparison of a shifted byte load to be split into a zero-extend
16202      followed by a comparison of the shifted integer (only valid for
16203      equalities and unsigned inequalities).  */
16204   if (GET_MODE (x) == SImode
16205       && GET_CODE (x) == ASHIFT
16206       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16207       && GET_CODE (XEXP (x, 0)) == SUBREG
16208       && MEM_P (SUBREG_REG (XEXP (x, 0)))
16209       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16210       && (op == EQ || op == NE
16211           || op == GEU || op == GTU || op == LTU || op == LEU)
16212       && CONST_INT_P (y))
16213     return CC_Zmode;
16214
16215   /* A construct for a conditional compare, if the false arm contains
16216      0, then both conditions must be true, otherwise either condition
16217      must be true.  Not all conditions are possible, so CCmode is
16218      returned if it can't be done.  */
16219   if (GET_CODE (x) == IF_THEN_ELSE
16220       && (XEXP (x, 2) == const0_rtx
16221           || XEXP (x, 2) == const1_rtx)
16222       && COMPARISON_P (XEXP (x, 0))
16223       && COMPARISON_P (XEXP (x, 1)))
16224     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16225                                          INTVAL (XEXP (x, 2)));
16226
16227   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
16228   if (GET_CODE (x) == AND
16229       && (op == EQ || op == NE)
16230       && COMPARISON_P (XEXP (x, 0))
16231       && COMPARISON_P (XEXP (x, 1)))
16232     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16233                                          DOM_CC_X_AND_Y);
16234
16235   if (GET_CODE (x) == IOR
16236       && (op == EQ || op == NE)
16237       && COMPARISON_P (XEXP (x, 0))
16238       && COMPARISON_P (XEXP (x, 1)))
16239     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16240                                          DOM_CC_X_OR_Y);
16241
16242   /* An operation (on Thumb) where we want to test for a single bit.
16243      This is done by shifting that bit up into the top bit of a
16244      scratch register; we can then branch on the sign bit.  */
16245   if (TARGET_THUMB1
16246       && GET_MODE (x) == SImode
16247       && (op == EQ || op == NE)
16248       && GET_CODE (x) == ZERO_EXTRACT
16249       && XEXP (x, 1) == const1_rtx)
16250     return CC_Nmode;
16251
16252   /* An operation that sets the condition codes as a side-effect, the
16253      V flag is not set correctly, so we can only use comparisons where
16254      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
16255      instead.)  */
16256   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
16257   if (GET_MODE (x) == SImode
16258       && y == const0_rtx
16259       && (op == EQ || op == NE || op == LT || op == GE)
16260       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16261           || GET_CODE (x) == AND || GET_CODE (x) == IOR
16262           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16263           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16264           || GET_CODE (x) == LSHIFTRT
16265           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16266           || GET_CODE (x) == ROTATERT
16267           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16268     return CC_NZmode;
16269
16270   /* A comparison of ~reg with a const is really a special
16271      canoncialization of compare (~const, reg), which is a reverse
16272      subtract operation.  We may not get here if CONST is 0, but that
16273      doesn't matter because ~0 isn't a valid immediate for RSB.  */
16274   if (GET_MODE (x) == SImode
16275       && GET_CODE (x) == NOT
16276       && CONST_INT_P (y))
16277     return CC_RSBmode;
16278
16279   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16280     return CC_Zmode;
16281
16282   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16283       && GET_CODE (x) == PLUS
16284       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16285     return CC_Cmode;
16286
16287   if (GET_MODE (x) == DImode
16288       && GET_CODE (x) == PLUS
16289       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16290       && CONST_INT_P (y)
16291       && UINTVAL (y) == 0x800000000
16292       && (op == GEU || op == LTU))
16293     return CC_ADCmode;
16294
16295   if (GET_MODE (x) == DImode
16296       && (op == GE || op == LT)
16297       && GET_CODE (x) == SIGN_EXTEND
16298       && ((GET_CODE (y) == PLUS
16299            && arm_borrow_operation (XEXP (y, 0), DImode))
16300           || arm_borrow_operation (y, DImode)))
16301     return CC_NVmode;
16302
16303   if (GET_MODE (x) == DImode
16304       && (op == GEU || op == LTU)
16305       && GET_CODE (x) == ZERO_EXTEND
16306       && ((GET_CODE (y) == PLUS
16307            && arm_borrow_operation (XEXP (y, 0), DImode))
16308           || arm_borrow_operation (y, DImode)))
16309     return CC_Bmode;
16310
16311   if (GET_MODE (x) == DImode
16312       && (op == EQ || op == NE)
16313       && (GET_CODE (x) == PLUS
16314           || GET_CODE (x) == MINUS)
16315       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16316           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16317       && GET_CODE (y) == SIGN_EXTEND
16318       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16319     return CC_Vmode;
16320
16321   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16322     return GET_MODE (x);
16323
16324   return CCmode;
16325 }
16326
16327 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
16328    the sequence of instructions needed to generate a suitable condition
16329    code register.  Return the CC register result.  */
16330 static rtx
16331 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16332 {
16333   machine_mode mode;
16334   rtx cc_reg;
16335
16336     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
16337   gcc_assert (TARGET_32BIT);
16338   gcc_assert (!CONST_INT_P (x));
16339
16340   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16341                                   subreg_lowpart_offset (SImode, DImode));
16342   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16343                                   subreg_highpart_offset (SImode, DImode));
16344   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16345                                   subreg_lowpart_offset (SImode, DImode));
16346   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16347                                   subreg_highpart_offset (SImode, DImode));
16348   switch (code)
16349     {
16350     case EQ:
16351     case NE:
16352       {
16353         if (y_lo == const0_rtx || y_hi == const0_rtx)
16354           {
16355             if (y_lo != const0_rtx)
16356               {
16357                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16358
16359                 gcc_assert (y_hi == const0_rtx);
16360                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16361                 if (!arm_add_operand (y_lo, SImode))
16362                   y_lo = force_reg (SImode, y_lo);
16363                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16364                 x_lo = scratch2;
16365               }
16366             else if (y_hi != const0_rtx)
16367               {
16368                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16369
16370                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16371                 if (!arm_add_operand (y_hi, SImode))
16372                   y_hi = force_reg (SImode, y_hi);
16373                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16374                 x_hi = scratch2;
16375               }
16376
16377             if (!scratch)
16378               {
16379                 gcc_assert (!reload_completed);
16380                 scratch = gen_rtx_SCRATCH (SImode);
16381               }
16382
16383             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16384             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16385
16386             rtx set
16387               = gen_rtx_SET (cc_reg,
16388                              gen_rtx_COMPARE (CC_NZmode,
16389                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16390                                               const0_rtx));
16391             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16392                                                               clobber)));
16393             return cc_reg;
16394           }
16395
16396         if (!arm_add_operand (y_lo, SImode))
16397           y_lo = force_reg (SImode, y_lo);
16398
16399         if (!arm_add_operand (y_hi, SImode))
16400           y_hi = force_reg (SImode, y_hi);
16401
16402         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16403         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16404         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16405         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16406         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16407
16408         emit_insn (gen_rtx_SET (cc_reg,
16409                                 gen_rtx_COMPARE (mode, conjunction,
16410                                                  const0_rtx)));
16411         return cc_reg;
16412       }
16413
16414     case LT:
16415     case GE:
16416       {
16417         if (y_lo == const0_rtx)
16418           {
16419             /* If the low word of y is 0, then this is simply a normal
16420                compare of the upper words.  */
16421             if (!arm_add_operand (y_hi, SImode))
16422               y_hi = force_reg (SImode, y_hi);
16423
16424             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16425           }
16426
16427         if (!arm_add_operand (y_lo, SImode))
16428           y_lo = force_reg (SImode, y_lo);
16429
16430         rtx cmp1
16431           = gen_rtx_LTU (DImode,
16432                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16433                          const0_rtx);
16434
16435         if (!scratch)
16436           scratch = gen_rtx_SCRATCH (SImode);
16437
16438         if (!arm_not_operand (y_hi, SImode))
16439           y_hi = force_reg (SImode, y_hi);
16440
16441         rtx_insn *insn;
16442         if (y_hi == const0_rtx)
16443           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16444                                                            cmp1));
16445         else if (CONST_INT_P (y_hi))
16446           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16447                                                              y_hi, cmp1));
16448         else
16449           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16450                                                          cmp1));
16451         return SET_DEST (single_set (insn));
16452       }
16453
16454     case LE:
16455     case GT:
16456       {
16457         /* During expansion, we only expect to get here if y is a
16458            constant that we want to handle, otherwise we should have
16459            swapped the operands already.  */
16460         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16461
16462         if (!const_ok_for_arm (INTVAL (y_lo)))
16463           y_lo = force_reg (SImode, y_lo);
16464
16465         /* Perform a reverse subtract and compare.  */
16466         rtx cmp1
16467           = gen_rtx_LTU (DImode,
16468                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16469                          const0_rtx);
16470         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16471                                                                  x_hi, cmp1));
16472         return SET_DEST (single_set (insn));
16473       }
16474
16475     case LTU:
16476     case GEU:
16477       {
16478         if (y_lo == const0_rtx)
16479           {
16480             /* If the low word of y is 0, then this is simply a normal
16481                compare of the upper words.  */
16482             if (!arm_add_operand (y_hi, SImode))
16483               y_hi = force_reg (SImode, y_hi);
16484
16485             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16486           }
16487
16488         if (!arm_add_operand (y_lo, SImode))
16489           y_lo = force_reg (SImode, y_lo);
16490
16491         rtx cmp1
16492           = gen_rtx_LTU (DImode,
16493                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16494                          const0_rtx);
16495
16496         if (!scratch)
16497           scratch = gen_rtx_SCRATCH (SImode);
16498         if (!arm_not_operand (y_hi, SImode))
16499           y_hi = force_reg (SImode, y_hi);
16500
16501         rtx_insn *insn;
16502         if (y_hi == const0_rtx)
16503           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16504                                                           cmp1));
16505         else if (CONST_INT_P (y_hi))
16506           {
16507             /* Constant is viewed as unsigned when zero-extended.  */
16508             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16509             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16510                                                               y_hi, cmp1));
16511           }
16512         else
16513           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16514                                                         cmp1));
16515         return SET_DEST (single_set (insn));
16516       }
16517
16518     case LEU:
16519     case GTU:
16520       {
16521         /* During expansion, we only expect to get here if y is a
16522            constant that we want to handle, otherwise we should have
16523            swapped the operands already.  */
16524         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16525
16526         if (!const_ok_for_arm (INTVAL (y_lo)))
16527           y_lo = force_reg (SImode, y_lo);
16528
16529         /* Perform a reverse subtract and compare.  */
16530         rtx cmp1
16531           = gen_rtx_LTU (DImode,
16532                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16533                          const0_rtx);
16534         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16535         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16536                                                                 x_hi, cmp1));
16537         return SET_DEST (single_set (insn));
16538       }
16539
16540     default:
16541       gcc_unreachable ();
16542     }
16543 }
16544
16545 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16546    return the rtx for register 0 in the proper mode.  */
16547 rtx
16548 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16549 {
16550   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16551     return arm_gen_dicompare_reg (code, x, y, scratch);
16552
16553   machine_mode mode = SELECT_CC_MODE (code, x, y);
16554   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16555   if (mode == CC_RSBmode)
16556     {
16557       if (!scratch)
16558         scratch = gen_rtx_SCRATCH (SImode);
16559       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16560                                               GEN_INT (~UINTVAL (x)), y));
16561     }
16562   else
16563     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16564
16565   return cc_reg;
16566 }
16567
16568 /* Generate a sequence of insns that will generate the correct return
16569    address mask depending on the physical architecture that the program
16570    is running on.  */
16571 rtx
16572 arm_gen_return_addr_mask (void)
16573 {
16574   rtx reg = gen_reg_rtx (Pmode);
16575
16576   emit_insn (gen_return_addr_mask (reg));
16577   return reg;
16578 }
16579
16580 void
16581 arm_reload_in_hi (rtx *operands)
16582 {
16583   rtx ref = operands[1];
16584   rtx base, scratch;
16585   HOST_WIDE_INT offset = 0;
16586
16587   if (SUBREG_P (ref))
16588     {
16589       offset = SUBREG_BYTE (ref);
16590       ref = SUBREG_REG (ref);
16591     }
16592
16593   if (REG_P (ref))
16594     {
16595       /* We have a pseudo which has been spilt onto the stack; there
16596          are two cases here: the first where there is a simple
16597          stack-slot replacement and a second where the stack-slot is
16598          out of range, or is used as a subreg.  */
16599       if (reg_equiv_mem (REGNO (ref)))
16600         {
16601           ref = reg_equiv_mem (REGNO (ref));
16602           base = find_replacement (&XEXP (ref, 0));
16603         }
16604       else
16605         /* The slot is out of range, or was dressed up in a SUBREG.  */
16606         base = reg_equiv_address (REGNO (ref));
16607
16608       /* PR 62554: If there is no equivalent memory location then just move
16609          the value as an SImode register move.  This happens when the target
16610          architecture variant does not have an HImode register move.  */
16611       if (base == NULL)
16612         {
16613           gcc_assert (REG_P (operands[0]));
16614           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16615                                 gen_rtx_SUBREG (SImode, ref, 0)));
16616           return;
16617         }
16618     }
16619   else
16620     base = find_replacement (&XEXP (ref, 0));
16621
16622   /* Handle the case where the address is too complex to be offset by 1.  */
16623   if (GET_CODE (base) == MINUS
16624       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16625     {
16626       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16627
16628       emit_set_insn (base_plus, base);
16629       base = base_plus;
16630     }
16631   else if (GET_CODE (base) == PLUS)
16632     {
16633       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16634       HOST_WIDE_INT hi, lo;
16635
16636       offset += INTVAL (XEXP (base, 1));
16637       base = XEXP (base, 0);
16638
16639       /* Rework the address into a legal sequence of insns.  */
16640       /* Valid range for lo is -4095 -> 4095 */
16641       lo = (offset >= 0
16642             ? (offset & 0xfff)
16643             : -((-offset) & 0xfff));
16644
16645       /* Corner case, if lo is the max offset then we would be out of range
16646          once we have added the additional 1 below, so bump the msb into the
16647          pre-loading insn(s).  */
16648       if (lo == 4095)
16649         lo &= 0x7ff;
16650
16651       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16652              ^ (HOST_WIDE_INT) 0x80000000)
16653             - (HOST_WIDE_INT) 0x80000000);
16654
16655       gcc_assert (hi + lo == offset);
16656
16657       if (hi != 0)
16658         {
16659           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16660
16661           /* Get the base address; addsi3 knows how to handle constants
16662              that require more than one insn.  */
16663           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16664           base = base_plus;
16665           offset = lo;
16666         }
16667     }
16668
16669   /* Operands[2] may overlap operands[0] (though it won't overlap
16670      operands[1]), that's why we asked for a DImode reg -- so we can
16671      use the bit that does not overlap.  */
16672   if (REGNO (operands[2]) == REGNO (operands[0]))
16673     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16674   else
16675     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16676
16677   emit_insn (gen_zero_extendqisi2 (scratch,
16678                                    gen_rtx_MEM (QImode,
16679                                                 plus_constant (Pmode, base,
16680                                                                offset))));
16681   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16682                                    gen_rtx_MEM (QImode,
16683                                                 plus_constant (Pmode, base,
16684                                                                offset + 1))));
16685   if (!BYTES_BIG_ENDIAN)
16686     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16687                    gen_rtx_IOR (SImode,
16688                                 gen_rtx_ASHIFT
16689                                 (SImode,
16690                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16691                                  GEN_INT (8)),
16692                                 scratch));
16693   else
16694     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16695                    gen_rtx_IOR (SImode,
16696                                 gen_rtx_ASHIFT (SImode, scratch,
16697                                                 GEN_INT (8)),
16698                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16699 }
16700
16701 /* Handle storing a half-word to memory during reload by synthesizing as two
16702    byte stores.  Take care not to clobber the input values until after we
16703    have moved them somewhere safe.  This code assumes that if the DImode
16704    scratch in operands[2] overlaps either the input value or output address
16705    in some way, then that value must die in this insn (we absolutely need
16706    two scratch registers for some corner cases).  */
16707 void
16708 arm_reload_out_hi (rtx *operands)
16709 {
16710   rtx ref = operands[0];
16711   rtx outval = operands[1];
16712   rtx base, scratch;
16713   HOST_WIDE_INT offset = 0;
16714
16715   if (SUBREG_P (ref))
16716     {
16717       offset = SUBREG_BYTE (ref);
16718       ref = SUBREG_REG (ref);
16719     }
16720
16721   if (REG_P (ref))
16722     {
16723       /* We have a pseudo which has been spilt onto the stack; there
16724          are two cases here: the first where there is a simple
16725          stack-slot replacement and a second where the stack-slot is
16726          out of range, or is used as a subreg.  */
16727       if (reg_equiv_mem (REGNO (ref)))
16728         {
16729           ref = reg_equiv_mem (REGNO (ref));
16730           base = find_replacement (&XEXP (ref, 0));
16731         }
16732       else
16733         /* The slot is out of range, or was dressed up in a SUBREG.  */
16734         base = reg_equiv_address (REGNO (ref));
16735
16736       /* PR 62254: If there is no equivalent memory location then just move
16737          the value as an SImode register move.  This happens when the target
16738          architecture variant does not have an HImode register move.  */
16739       if (base == NULL)
16740         {
16741           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16742
16743           if (REG_P (outval))
16744             {
16745               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16746                                     gen_rtx_SUBREG (SImode, outval, 0)));
16747             }
16748           else /* SUBREG_P (outval)  */
16749             {
16750               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16751                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16752                                       SUBREG_REG (outval)));
16753               else
16754                 /* FIXME: Handle other cases ?  */
16755                 gcc_unreachable ();
16756             }
16757           return;
16758         }
16759     }
16760   else
16761     base = find_replacement (&XEXP (ref, 0));
16762
16763   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16764
16765   /* Handle the case where the address is too complex to be offset by 1.  */
16766   if (GET_CODE (base) == MINUS
16767       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16768     {
16769       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16770
16771       /* Be careful not to destroy OUTVAL.  */
16772       if (reg_overlap_mentioned_p (base_plus, outval))
16773         {
16774           /* Updating base_plus might destroy outval, see if we can
16775              swap the scratch and base_plus.  */
16776           if (!reg_overlap_mentioned_p (scratch, outval))
16777             std::swap (scratch, base_plus);
16778           else
16779             {
16780               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16781
16782               /* Be conservative and copy OUTVAL into the scratch now,
16783                  this should only be necessary if outval is a subreg
16784                  of something larger than a word.  */
16785               /* XXX Might this clobber base?  I can't see how it can,
16786                  since scratch is known to overlap with OUTVAL, and
16787                  must be wider than a word.  */
16788               emit_insn (gen_movhi (scratch_hi, outval));
16789               outval = scratch_hi;
16790             }
16791         }
16792
16793       emit_set_insn (base_plus, base);
16794       base = base_plus;
16795     }
16796   else if (GET_CODE (base) == PLUS)
16797     {
16798       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16799       HOST_WIDE_INT hi, lo;
16800
16801       offset += INTVAL (XEXP (base, 1));
16802       base = XEXP (base, 0);
16803
16804       /* Rework the address into a legal sequence of insns.  */
16805       /* Valid range for lo is -4095 -> 4095 */
16806       lo = (offset >= 0
16807             ? (offset & 0xfff)
16808             : -((-offset) & 0xfff));
16809
16810       /* Corner case, if lo is the max offset then we would be out of range
16811          once we have added the additional 1 below, so bump the msb into the
16812          pre-loading insn(s).  */
16813       if (lo == 4095)
16814         lo &= 0x7ff;
16815
16816       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16817              ^ (HOST_WIDE_INT) 0x80000000)
16818             - (HOST_WIDE_INT) 0x80000000);
16819
16820       gcc_assert (hi + lo == offset);
16821
16822       if (hi != 0)
16823         {
16824           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16825
16826           /* Be careful not to destroy OUTVAL.  */
16827           if (reg_overlap_mentioned_p (base_plus, outval))
16828             {
16829               /* Updating base_plus might destroy outval, see if we
16830                  can swap the scratch and base_plus.  */
16831               if (!reg_overlap_mentioned_p (scratch, outval))
16832                 std::swap (scratch, base_plus);
16833               else
16834                 {
16835                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16836
16837                   /* Be conservative and copy outval into scratch now,
16838                      this should only be necessary if outval is a
16839                      subreg of something larger than a word.  */
16840                   /* XXX Might this clobber base?  I can't see how it
16841                      can, since scratch is known to overlap with
16842                      outval.  */
16843                   emit_insn (gen_movhi (scratch_hi, outval));
16844                   outval = scratch_hi;
16845                 }
16846             }
16847
16848           /* Get the base address; addsi3 knows how to handle constants
16849              that require more than one insn.  */
16850           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16851           base = base_plus;
16852           offset = lo;
16853         }
16854     }
16855
16856   if (BYTES_BIG_ENDIAN)
16857     {
16858       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16859                                          plus_constant (Pmode, base,
16860                                                         offset + 1)),
16861                             gen_lowpart (QImode, outval)));
16862       emit_insn (gen_lshrsi3 (scratch,
16863                               gen_rtx_SUBREG (SImode, outval, 0),
16864                               GEN_INT (8)));
16865       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16866                                                                 offset)),
16867                             gen_lowpart (QImode, scratch)));
16868     }
16869   else
16870     {
16871       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16872                                                                 offset)),
16873                             gen_lowpart (QImode, outval)));
16874       emit_insn (gen_lshrsi3 (scratch,
16875                               gen_rtx_SUBREG (SImode, outval, 0),
16876                               GEN_INT (8)));
16877       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16878                                          plus_constant (Pmode, base,
16879                                                         offset + 1)),
16880                             gen_lowpart (QImode, scratch)));
16881     }
16882 }
16883
16884 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16885    (padded to the size of a word) should be passed in a register.  */
16886
16887 static bool
16888 arm_must_pass_in_stack (const function_arg_info &arg)
16889 {
16890   if (TARGET_AAPCS_BASED)
16891     return must_pass_in_stack_var_size (arg);
16892   else
16893     return must_pass_in_stack_var_size_or_pad (arg);
16894 }
16895
16896
16897 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16898    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16899    the default.  For AAPCS based ABIs small aggregate types are placed
16900    in the lowest memory address.  */
16901
16902 static pad_direction
16903 arm_function_arg_padding (machine_mode mode, const_tree type)
16904 {
16905   if (!TARGET_AAPCS_BASED)
16906     return default_function_arg_padding (mode, type);
16907
16908   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16909     return PAD_DOWNWARD;
16910
16911   return PAD_UPWARD;
16912 }
16913
16914
16915 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16916    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16917    register has useful data, and return the opposite if the most
16918    significant byte does.  */
16919
16920 bool
16921 arm_pad_reg_upward (machine_mode mode,
16922                     tree type, int first ATTRIBUTE_UNUSED)
16923 {
16924   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16925     {
16926       /* For AAPCS, small aggregates, small fixed-point types,
16927          and small complex types are always padded upwards.  */
16928       if (type)
16929         {
16930           if ((AGGREGATE_TYPE_P (type)
16931                || TREE_CODE (type) == COMPLEX_TYPE
16932                || FIXED_POINT_TYPE_P (type))
16933               && int_size_in_bytes (type) <= 4)
16934             return true;
16935         }
16936       else
16937         {
16938           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16939               && GET_MODE_SIZE (mode) <= 4)
16940             return true;
16941         }
16942     }
16943
16944   /* Otherwise, use default padding.  */
16945   return !BYTES_BIG_ENDIAN;
16946 }
16947
16948 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16949    assuming that the address in the base register is word aligned.  */
16950 bool
16951 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16952 {
16953   HOST_WIDE_INT max_offset;
16954
16955   /* Offset must be a multiple of 4 in Thumb mode.  */
16956   if (TARGET_THUMB2 && ((offset & 3) != 0))
16957     return false;
16958
16959   if (TARGET_THUMB2)
16960     max_offset = 1020;
16961   else if (TARGET_ARM)
16962     max_offset = 255;
16963   else
16964     return false;
16965
16966   return ((offset <= max_offset) && (offset >= -max_offset));
16967 }
16968
16969 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16970    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16971    Assumes that the address in the base register RN is word aligned.  Pattern
16972    guarantees that both memory accesses use the same base register,
16973    the offsets are constants within the range, and the gap between the offsets is 4.
16974    If preload complete then check that registers are legal.  WBACK indicates whether
16975    address is updated.  LOAD indicates whether memory access is load or store.  */
16976 bool
16977 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16978                        bool wback, bool load)
16979 {
16980   unsigned int t, t2, n;
16981
16982   if (!reload_completed)
16983     return true;
16984
16985   if (!offset_ok_for_ldrd_strd (offset))
16986     return false;
16987
16988   t = REGNO (rt);
16989   t2 = REGNO (rt2);
16990   n = REGNO (rn);
16991
16992   if ((TARGET_THUMB2)
16993       && ((wback && (n == t || n == t2))
16994           || (t == SP_REGNUM)
16995           || (t == PC_REGNUM)
16996           || (t2 == SP_REGNUM)
16997           || (t2 == PC_REGNUM)
16998           || (!load && (n == PC_REGNUM))
16999           || (load && (t == t2))
17000           /* Triggers Cortex-M3 LDRD errata.  */
17001           || (!wback && load && fix_cm3_ldrd && (n == t))))
17002     return false;
17003
17004   if ((TARGET_ARM)
17005       && ((wback && (n == t || n == t2))
17006           || (t2 == PC_REGNUM)
17007           || (t % 2 != 0)   /* First destination register is not even.  */
17008           || (t2 != t + 1)
17009           /* PC can be used as base register (for offset addressing only),
17010              but it is depricated.  */
17011           || (n == PC_REGNUM)))
17012     return false;
17013
17014   return true;
17015 }
17016
17017 /* Return true if a 64-bit access with alignment ALIGN and with a
17018    constant offset OFFSET from the base pointer is permitted on this
17019    architecture.  */
17020 static bool
17021 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
17022 {
17023   return (unaligned_access
17024           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
17025           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
17026 }
17027
17028 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
17029    operand MEM's address contains an immediate offset from the base
17030    register and has no side effects, in which case it sets BASE,
17031    OFFSET and ALIGN accordingly.  */
17032 static bool
17033 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
17034 {
17035   rtx addr;
17036
17037   gcc_assert (base != NULL && offset != NULL);
17038
17039   /* TODO: Handle more general memory operand patterns, such as
17040      PRE_DEC and PRE_INC.  */
17041
17042   if (side_effects_p (mem))
17043     return false;
17044
17045   /* Can't deal with subregs.  */
17046   if (SUBREG_P (mem))
17047     return false;
17048
17049   gcc_assert (MEM_P (mem));
17050
17051   *offset = const0_rtx;
17052   *align = MEM_ALIGN (mem);
17053
17054   addr = XEXP (mem, 0);
17055
17056   /* If addr isn't valid for DImode, then we can't handle it.  */
17057   if (!arm_legitimate_address_p (DImode, addr,
17058                                  reload_in_progress || reload_completed))
17059     return false;
17060
17061   if (REG_P (addr))
17062     {
17063       *base = addr;
17064       return true;
17065     }
17066   else if (GET_CODE (addr) == PLUS)
17067     {
17068       *base = XEXP (addr, 0);
17069       *offset = XEXP (addr, 1);
17070       return (REG_P (*base) && CONST_INT_P (*offset));
17071     }
17072
17073   return false;
17074 }
17075
17076 /* Called from a peephole2 to replace two word-size accesses with a
17077    single LDRD/STRD instruction.  Returns true iff we can generate a
17078    new instruction sequence.  That is, both accesses use the same base
17079    register and the gap between constant offsets is 4.  This function
17080    may reorder its operands to match ldrd/strd RTL templates.
17081    OPERANDS are the operands found by the peephole matcher;
17082    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17083    corresponding memory operands.  LOAD indicaates whether the access
17084    is load or store.  CONST_STORE indicates a store of constant
17085    integer values held in OPERANDS[4,5] and assumes that the pattern
17086    is of length 4 insn, for the purpose of checking dead registers.
17087    COMMUTE indicates that register operands may be reordered.  */
17088 bool
17089 gen_operands_ldrd_strd (rtx *operands, bool load,
17090                         bool const_store, bool commute)
17091 {
17092   int nops = 2;
17093   HOST_WIDE_INT offsets[2], offset, align[2];
17094   rtx base = NULL_RTX;
17095   rtx cur_base, cur_offset, tmp;
17096   int i, gap;
17097   HARD_REG_SET regset;
17098
17099   gcc_assert (!const_store || !load);
17100   /* Check that the memory references are immediate offsets from the
17101      same base register.  Extract the base register, the destination
17102      registers, and the corresponding memory offsets.  */
17103   for (i = 0; i < nops; i++)
17104     {
17105       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17106                                  &align[i]))
17107         return false;
17108
17109       if (i == 0)
17110         base = cur_base;
17111       else if (REGNO (base) != REGNO (cur_base))
17112         return false;
17113
17114       offsets[i] = INTVAL (cur_offset);
17115       if (GET_CODE (operands[i]) == SUBREG)
17116         {
17117           tmp = SUBREG_REG (operands[i]);
17118           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17119           operands[i] = tmp;
17120         }
17121     }
17122
17123   /* Make sure there is no dependency between the individual loads.  */
17124   if (load && REGNO (operands[0]) == REGNO (base))
17125     return false; /* RAW */
17126
17127   if (load && REGNO (operands[0]) == REGNO (operands[1]))
17128     return false; /* WAW */
17129
17130   /* If the same input register is used in both stores
17131      when storing different constants, try to find a free register.
17132      For example, the code
17133         mov r0, 0
17134         str r0, [r2]
17135         mov r0, 1
17136         str r0, [r2, #4]
17137      can be transformed into
17138         mov r1, 0
17139         mov r0, 1
17140         strd r1, r0, [r2]
17141      in Thumb mode assuming that r1 is free.
17142      For ARM mode do the same but only if the starting register
17143      can be made to be even.  */
17144   if (const_store
17145       && REGNO (operands[0]) == REGNO (operands[1])
17146       && INTVAL (operands[4]) != INTVAL (operands[5]))
17147     {
17148     if (TARGET_THUMB2)
17149       {
17150         CLEAR_HARD_REG_SET (regset);
17151         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17152         if (tmp == NULL_RTX)
17153           return false;
17154
17155         /* Use the new register in the first load to ensure that
17156            if the original input register is not dead after peephole,
17157            then it will have the correct constant value.  */
17158         operands[0] = tmp;
17159       }
17160     else if (TARGET_ARM)
17161       {
17162         int regno = REGNO (operands[0]);
17163         if (!peep2_reg_dead_p (4, operands[0]))
17164           {
17165             /* When the input register is even and is not dead after the
17166                pattern, it has to hold the second constant but we cannot
17167                form a legal STRD in ARM mode with this register as the second
17168                register.  */
17169             if (regno % 2 == 0)
17170               return false;
17171
17172             /* Is regno-1 free? */
17173             SET_HARD_REG_SET (regset);
17174             CLEAR_HARD_REG_BIT(regset, regno - 1);
17175             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17176             if (tmp == NULL_RTX)
17177               return false;
17178
17179             operands[0] = tmp;
17180           }
17181         else
17182           {
17183             /* Find a DImode register.  */
17184             CLEAR_HARD_REG_SET (regset);
17185             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17186             if (tmp != NULL_RTX)
17187               {
17188                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17189                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17190               }
17191             else
17192               {
17193                 /* Can we use the input register to form a DI register?  */
17194                 SET_HARD_REG_SET (regset);
17195                 CLEAR_HARD_REG_BIT(regset,
17196                                    regno % 2 == 0 ? regno + 1 : regno - 1);
17197                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17198                 if (tmp == NULL_RTX)
17199                   return false;
17200                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17201               }
17202           }
17203
17204         gcc_assert (operands[0] != NULL_RTX);
17205         gcc_assert (operands[1] != NULL_RTX);
17206         gcc_assert (REGNO (operands[0]) % 2 == 0);
17207         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17208       }
17209     }
17210
17211   /* Make sure the instructions are ordered with lower memory access first.  */
17212   if (offsets[0] > offsets[1])
17213     {
17214       gap = offsets[0] - offsets[1];
17215       offset = offsets[1];
17216
17217       /* Swap the instructions such that lower memory is accessed first.  */
17218       std::swap (operands[0], operands[1]);
17219       std::swap (operands[2], operands[3]);
17220       std::swap (align[0], align[1]);
17221       if (const_store)
17222         std::swap (operands[4], operands[5]);
17223     }
17224   else
17225     {
17226       gap = offsets[1] - offsets[0];
17227       offset = offsets[0];
17228     }
17229
17230   /* Make sure accesses are to consecutive memory locations.  */
17231   if (gap != GET_MODE_SIZE (SImode))
17232     return false;
17233
17234   if (!align_ok_ldrd_strd (align[0], offset))
17235     return false;
17236
17237   /* Make sure we generate legal instructions.  */
17238   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17239                              false, load))
17240     return true;
17241
17242   /* In Thumb state, where registers are almost unconstrained, there
17243      is little hope to fix it.  */
17244   if (TARGET_THUMB2)
17245     return false;
17246
17247   if (load && commute)
17248     {
17249       /* Try reordering registers.  */
17250       std::swap (operands[0], operands[1]);
17251       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17252                                  false, load))
17253         return true;
17254     }
17255
17256   if (const_store)
17257     {
17258       /* If input registers are dead after this pattern, they can be
17259          reordered or replaced by other registers that are free in the
17260          current pattern.  */
17261       if (!peep2_reg_dead_p (4, operands[0])
17262           || !peep2_reg_dead_p (4, operands[1]))
17263         return false;
17264
17265       /* Try to reorder the input registers.  */
17266       /* For example, the code
17267            mov r0, 0
17268            mov r1, 1
17269            str r1, [r2]
17270            str r0, [r2, #4]
17271          can be transformed into
17272            mov r1, 0
17273            mov r0, 1
17274            strd r0, [r2]
17275       */
17276       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17277                                   false, false))
17278         {
17279           std::swap (operands[0], operands[1]);
17280           return true;
17281         }
17282
17283       /* Try to find a free DI register.  */
17284       CLEAR_HARD_REG_SET (regset);
17285       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17286       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17287       while (true)
17288         {
17289           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17290           if (tmp == NULL_RTX)
17291             return false;
17292
17293           /* DREG must be an even-numbered register in DImode.
17294              Split it into SI registers.  */
17295           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17296           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17297           gcc_assert (operands[0] != NULL_RTX);
17298           gcc_assert (operands[1] != NULL_RTX);
17299           gcc_assert (REGNO (operands[0]) % 2 == 0);
17300           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17301
17302           return (operands_ok_ldrd_strd (operands[0], operands[1],
17303                                          base, offset,
17304                                          false, load));
17305         }
17306     }
17307
17308   return false;
17309 }
17310
17311
17312 /* Return true if parallel execution of the two word-size accesses provided
17313    could be satisfied with a single LDRD/STRD instruction.  Two word-size
17314    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17315    register operands and OPERANDS[2,3] are the corresponding memory operands.
17316    */
17317 bool
17318 valid_operands_ldrd_strd (rtx *operands, bool load)
17319 {
17320   int nops = 2;
17321   HOST_WIDE_INT offsets[2], offset, align[2];
17322   rtx base = NULL_RTX;
17323   rtx cur_base, cur_offset;
17324   int i, gap;
17325
17326   /* Check that the memory references are immediate offsets from the
17327      same base register.  Extract the base register, the destination
17328      registers, and the corresponding memory offsets.  */
17329   for (i = 0; i < nops; i++)
17330     {
17331       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17332                                  &align[i]))
17333         return false;
17334
17335       if (i == 0)
17336         base = cur_base;
17337       else if (REGNO (base) != REGNO (cur_base))
17338         return false;
17339
17340       offsets[i] = INTVAL (cur_offset);
17341       if (GET_CODE (operands[i]) == SUBREG)
17342         return false;
17343     }
17344
17345   if (offsets[0] > offsets[1])
17346     return false;
17347
17348   gap = offsets[1] - offsets[0];
17349   offset = offsets[0];
17350
17351   /* Make sure accesses are to consecutive memory locations.  */
17352   if (gap != GET_MODE_SIZE (SImode))
17353     return false;
17354
17355   if (!align_ok_ldrd_strd (align[0], offset))
17356     return false;
17357
17358   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17359                                 false, load);
17360 }
17361
17362 \f
17363 /* Print a symbolic form of X to the debug file, F.  */
17364 static void
17365 arm_print_value (FILE *f, rtx x)
17366 {
17367   switch (GET_CODE (x))
17368     {
17369     case CONST_INT:
17370       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17371       return;
17372
17373     case CONST_DOUBLE:
17374       {
17375         char fpstr[20];
17376         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17377                          sizeof (fpstr), 0, 1);
17378         fputs (fpstr, f);
17379       }
17380       return;
17381
17382     case CONST_VECTOR:
17383       {
17384         int i;
17385
17386         fprintf (f, "<");
17387         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17388           {
17389             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17390             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17391               fputc (',', f);
17392           }
17393         fprintf (f, ">");
17394       }
17395       return;
17396
17397     case CONST_STRING:
17398       fprintf (f, "\"%s\"", XSTR (x, 0));
17399       return;
17400
17401     case SYMBOL_REF:
17402       fprintf (f, "`%s'", XSTR (x, 0));
17403       return;
17404
17405     case LABEL_REF:
17406       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17407       return;
17408
17409     case CONST:
17410       arm_print_value (f, XEXP (x, 0));
17411       return;
17412
17413     case PLUS:
17414       arm_print_value (f, XEXP (x, 0));
17415       fprintf (f, "+");
17416       arm_print_value (f, XEXP (x, 1));
17417       return;
17418
17419     case PC:
17420       fprintf (f, "pc");
17421       return;
17422
17423     default:
17424       fprintf (f, "????");
17425       return;
17426     }
17427 }
17428 \f
17429 /* Routines for manipulation of the constant pool.  */
17430
17431 /* Arm instructions cannot load a large constant directly into a
17432    register; they have to come from a pc relative load.  The constant
17433    must therefore be placed in the addressable range of the pc
17434    relative load.  Depending on the precise pc relative load
17435    instruction the range is somewhere between 256 bytes and 4k.  This
17436    means that we often have to dump a constant inside a function, and
17437    generate code to branch around it.
17438
17439    It is important to minimize this, since the branches will slow
17440    things down and make the code larger.
17441
17442    Normally we can hide the table after an existing unconditional
17443    branch so that there is no interruption of the flow, but in the
17444    worst case the code looks like this:
17445
17446         ldr     rn, L1
17447         ...
17448         b       L2
17449         align
17450         L1:     .long value
17451         L2:
17452         ...
17453
17454         ldr     rn, L3
17455         ...
17456         b       L4
17457         align
17458         L3:     .long value
17459         L4:
17460         ...
17461
17462    We fix this by performing a scan after scheduling, which notices
17463    which instructions need to have their operands fetched from the
17464    constant table and builds the table.
17465
17466    The algorithm starts by building a table of all the constants that
17467    need fixing up and all the natural barriers in the function (places
17468    where a constant table can be dropped without breaking the flow).
17469    For each fixup we note how far the pc-relative replacement will be
17470    able to reach and the offset of the instruction into the function.
17471
17472    Having built the table we then group the fixes together to form
17473    tables that are as large as possible (subject to addressing
17474    constraints) and emit each table of constants after the last
17475    barrier that is within range of all the instructions in the group.
17476    If a group does not contain a barrier, then we forcibly create one
17477    by inserting a jump instruction into the flow.  Once the table has
17478    been inserted, the insns are then modified to reference the
17479    relevant entry in the pool.
17480
17481    Possible enhancements to the algorithm (not implemented) are:
17482
17483    1) For some processors and object formats, there may be benefit in
17484    aligning the pools to the start of cache lines; this alignment
17485    would need to be taken into account when calculating addressability
17486    of a pool.  */
17487
17488 /* These typedefs are located at the start of this file, so that
17489    they can be used in the prototypes there.  This comment is to
17490    remind readers of that fact so that the following structures
17491    can be understood more easily.
17492
17493      typedef struct minipool_node    Mnode;
17494      typedef struct minipool_fixup   Mfix;  */
17495
17496 struct minipool_node
17497 {
17498   /* Doubly linked chain of entries.  */
17499   Mnode * next;
17500   Mnode * prev;
17501   /* The maximum offset into the code that this entry can be placed.  While
17502      pushing fixes for forward references, all entries are sorted in order
17503      of increasing max_address.  */
17504   HOST_WIDE_INT max_address;
17505   /* Similarly for an entry inserted for a backwards ref.  */
17506   HOST_WIDE_INT min_address;
17507   /* The number of fixes referencing this entry.  This can become zero
17508      if we "unpush" an entry.  In this case we ignore the entry when we
17509      come to emit the code.  */
17510   int refcount;
17511   /* The offset from the start of the minipool.  */
17512   HOST_WIDE_INT offset;
17513   /* The value in table.  */
17514   rtx value;
17515   /* The mode of value.  */
17516   machine_mode mode;
17517   /* The size of the value.  With iWMMXt enabled
17518      sizes > 4 also imply an alignment of 8-bytes.  */
17519   int fix_size;
17520 };
17521
17522 struct minipool_fixup
17523 {
17524   Mfix *            next;
17525   rtx_insn *        insn;
17526   HOST_WIDE_INT     address;
17527   rtx *             loc;
17528   machine_mode mode;
17529   int               fix_size;
17530   rtx               value;
17531   Mnode *           minipool;
17532   HOST_WIDE_INT     forwards;
17533   HOST_WIDE_INT     backwards;
17534 };
17535
17536 /* Fixes less than a word need padding out to a word boundary.  */
17537 #define MINIPOOL_FIX_SIZE(mode) \
17538   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17539
17540 static Mnode *  minipool_vector_head;
17541 static Mnode *  minipool_vector_tail;
17542 static rtx_code_label   *minipool_vector_label;
17543 static int      minipool_pad;
17544
17545 /* The linked list of all minipool fixes required for this function.  */
17546 Mfix *          minipool_fix_head;
17547 Mfix *          minipool_fix_tail;
17548 /* The fix entry for the current minipool, once it has been placed.  */
17549 Mfix *          minipool_barrier;
17550
17551 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17552 #define JUMP_TABLES_IN_TEXT_SECTION 0
17553 #endif
17554
17555 static HOST_WIDE_INT
17556 get_jump_table_size (rtx_jump_table_data *insn)
17557 {
17558   /* ADDR_VECs only take room if read-only data does into the text
17559      section.  */
17560   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17561     {
17562       rtx body = PATTERN (insn);
17563       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17564       HOST_WIDE_INT size;
17565       HOST_WIDE_INT modesize;
17566
17567       modesize = GET_MODE_SIZE (GET_MODE (body));
17568       size = modesize * XVECLEN (body, elt);
17569       switch (modesize)
17570         {
17571         case 1:
17572           /* Round up size  of TBB table to a halfword boundary.  */
17573           size = (size + 1) & ~HOST_WIDE_INT_1;
17574           break;
17575         case 2:
17576           /* No padding necessary for TBH.  */
17577           break;
17578         case 4:
17579           /* Add two bytes for alignment on Thumb.  */
17580           if (TARGET_THUMB)
17581             size += 2;
17582           break;
17583         default:
17584           gcc_unreachable ();
17585         }
17586       return size;
17587     }
17588
17589   return 0;
17590 }
17591
17592 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17593    function descriptor) into a register and the GOT address into the
17594    FDPIC register, returning an rtx for the register holding the
17595    function address.  */
17596
17597 rtx
17598 arm_load_function_descriptor (rtx funcdesc)
17599 {
17600   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17601   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17602   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17603   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17604
17605   emit_move_insn (fnaddr_reg, fnaddr);
17606
17607   /* The ABI requires the entry point address to be loaded first, but
17608      since we cannot support lazy binding for lack of atomic load of
17609      two 32-bits values, we do not need to bother to prevent the
17610      previous load from being moved after that of the GOT address.  */
17611   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17612
17613   return fnaddr_reg;
17614 }
17615
17616 /* Return the maximum amount of padding that will be inserted before
17617    label LABEL.  */
17618 static HOST_WIDE_INT
17619 get_label_padding (rtx label)
17620 {
17621   HOST_WIDE_INT align, min_insn_size;
17622
17623   align = 1 << label_to_alignment (label).levels[0].log;
17624   min_insn_size = TARGET_THUMB ? 2 : 4;
17625   return align > min_insn_size ? align - min_insn_size : 0;
17626 }
17627
17628 /* Move a minipool fix MP from its current location to before MAX_MP.
17629    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17630    constraints may need updating.  */
17631 static Mnode *
17632 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17633                                HOST_WIDE_INT max_address)
17634 {
17635   /* The code below assumes these are different.  */
17636   gcc_assert (mp != max_mp);
17637
17638   if (max_mp == NULL)
17639     {
17640       if (max_address < mp->max_address)
17641         mp->max_address = max_address;
17642     }
17643   else
17644     {
17645       if (max_address > max_mp->max_address - mp->fix_size)
17646         mp->max_address = max_mp->max_address - mp->fix_size;
17647       else
17648         mp->max_address = max_address;
17649
17650       /* Unlink MP from its current position.  Since max_mp is non-null,
17651        mp->prev must be non-null.  */
17652       mp->prev->next = mp->next;
17653       if (mp->next != NULL)
17654         mp->next->prev = mp->prev;
17655       else
17656         minipool_vector_tail = mp->prev;
17657
17658       /* Re-insert it before MAX_MP.  */
17659       mp->next = max_mp;
17660       mp->prev = max_mp->prev;
17661       max_mp->prev = mp;
17662
17663       if (mp->prev != NULL)
17664         mp->prev->next = mp;
17665       else
17666         minipool_vector_head = mp;
17667     }
17668
17669   /* Save the new entry.  */
17670   max_mp = mp;
17671
17672   /* Scan over the preceding entries and adjust their addresses as
17673      required.  */
17674   while (mp->prev != NULL
17675          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17676     {
17677       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17678       mp = mp->prev;
17679     }
17680
17681   return max_mp;
17682 }
17683
17684 /* Add a constant to the minipool for a forward reference.  Returns the
17685    node added or NULL if the constant will not fit in this pool.  */
17686 static Mnode *
17687 add_minipool_forward_ref (Mfix *fix)
17688 {
17689   /* If set, max_mp is the first pool_entry that has a lower
17690      constraint than the one we are trying to add.  */
17691   Mnode *       max_mp = NULL;
17692   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17693   Mnode *       mp;
17694
17695   /* If the minipool starts before the end of FIX->INSN then this FIX
17696      cannot be placed into the current pool.  Furthermore, adding the
17697      new constant pool entry may cause the pool to start FIX_SIZE bytes
17698      earlier.  */
17699   if (minipool_vector_head &&
17700       (fix->address + get_attr_length (fix->insn)
17701        >= minipool_vector_head->max_address - fix->fix_size))
17702     return NULL;
17703
17704   /* Scan the pool to see if a constant with the same value has
17705      already been added.  While we are doing this, also note the
17706      location where we must insert the constant if it doesn't already
17707      exist.  */
17708   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17709     {
17710       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17711           && fix->mode == mp->mode
17712           && (!LABEL_P (fix->value)
17713               || (CODE_LABEL_NUMBER (fix->value)
17714                   == CODE_LABEL_NUMBER (mp->value)))
17715           && rtx_equal_p (fix->value, mp->value))
17716         {
17717           /* More than one fix references this entry.  */
17718           mp->refcount++;
17719           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17720         }
17721
17722       /* Note the insertion point if necessary.  */
17723       if (max_mp == NULL
17724           && mp->max_address > max_address)
17725         max_mp = mp;
17726
17727       /* If we are inserting an 8-bytes aligned quantity and
17728          we have not already found an insertion point, then
17729          make sure that all such 8-byte aligned quantities are
17730          placed at the start of the pool.  */
17731       if (ARM_DOUBLEWORD_ALIGN
17732           && max_mp == NULL
17733           && fix->fix_size >= 8
17734           && mp->fix_size < 8)
17735         {
17736           max_mp = mp;
17737           max_address = mp->max_address;
17738         }
17739     }
17740
17741   /* The value is not currently in the minipool, so we need to create
17742      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17743      the end of the list since the placement is less constrained than
17744      any existing entry.  Otherwise, we insert the new fix before
17745      MAX_MP and, if necessary, adjust the constraints on the other
17746      entries.  */
17747   mp = XNEW (Mnode);
17748   mp->fix_size = fix->fix_size;
17749   mp->mode = fix->mode;
17750   mp->value = fix->value;
17751   mp->refcount = 1;
17752   /* Not yet required for a backwards ref.  */
17753   mp->min_address = -65536;
17754
17755   if (max_mp == NULL)
17756     {
17757       mp->max_address = max_address;
17758       mp->next = NULL;
17759       mp->prev = minipool_vector_tail;
17760
17761       if (mp->prev == NULL)
17762         {
17763           minipool_vector_head = mp;
17764           minipool_vector_label = gen_label_rtx ();
17765         }
17766       else
17767         mp->prev->next = mp;
17768
17769       minipool_vector_tail = mp;
17770     }
17771   else
17772     {
17773       if (max_address > max_mp->max_address - mp->fix_size)
17774         mp->max_address = max_mp->max_address - mp->fix_size;
17775       else
17776         mp->max_address = max_address;
17777
17778       mp->next = max_mp;
17779       mp->prev = max_mp->prev;
17780       max_mp->prev = mp;
17781       if (mp->prev != NULL)
17782         mp->prev->next = mp;
17783       else
17784         minipool_vector_head = mp;
17785     }
17786
17787   /* Save the new entry.  */
17788   max_mp = mp;
17789
17790   /* Scan over the preceding entries and adjust their addresses as
17791      required.  */
17792   while (mp->prev != NULL
17793          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17794     {
17795       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17796       mp = mp->prev;
17797     }
17798
17799   return max_mp;
17800 }
17801
17802 static Mnode *
17803 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17804                                 HOST_WIDE_INT  min_address)
17805 {
17806   HOST_WIDE_INT offset;
17807
17808   /* The code below assumes these are different.  */
17809   gcc_assert (mp != min_mp);
17810
17811   if (min_mp == NULL)
17812     {
17813       if (min_address > mp->min_address)
17814         mp->min_address = min_address;
17815     }
17816   else
17817     {
17818       /* We will adjust this below if it is too loose.  */
17819       mp->min_address = min_address;
17820
17821       /* Unlink MP from its current position.  Since min_mp is non-null,
17822          mp->next must be non-null.  */
17823       mp->next->prev = mp->prev;
17824       if (mp->prev != NULL)
17825         mp->prev->next = mp->next;
17826       else
17827         minipool_vector_head = mp->next;
17828
17829       /* Reinsert it after MIN_MP.  */
17830       mp->prev = min_mp;
17831       mp->next = min_mp->next;
17832       min_mp->next = mp;
17833       if (mp->next != NULL)
17834         mp->next->prev = mp;
17835       else
17836         minipool_vector_tail = mp;
17837     }
17838
17839   min_mp = mp;
17840
17841   offset = 0;
17842   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17843     {
17844       mp->offset = offset;
17845       if (mp->refcount > 0)
17846         offset += mp->fix_size;
17847
17848       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17849         mp->next->min_address = mp->min_address + mp->fix_size;
17850     }
17851
17852   return min_mp;
17853 }
17854
17855 /* Add a constant to the minipool for a backward reference.  Returns the
17856    node added or NULL if the constant will not fit in this pool.
17857
17858    Note that the code for insertion for a backwards reference can be
17859    somewhat confusing because the calculated offsets for each fix do
17860    not take into account the size of the pool (which is still under
17861    construction.  */
17862 static Mnode *
17863 add_minipool_backward_ref (Mfix *fix)
17864 {
17865   /* If set, min_mp is the last pool_entry that has a lower constraint
17866      than the one we are trying to add.  */
17867   Mnode *min_mp = NULL;
17868   /* This can be negative, since it is only a constraint.  */
17869   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17870   Mnode *mp;
17871
17872   /* If we can't reach the current pool from this insn, or if we can't
17873      insert this entry at the end of the pool without pushing other
17874      fixes out of range, then we don't try.  This ensures that we
17875      can't fail later on.  */
17876   if (min_address >= minipool_barrier->address
17877       || (minipool_vector_tail->min_address + fix->fix_size
17878           >= minipool_barrier->address))
17879     return NULL;
17880
17881   /* Scan the pool to see if a constant with the same value has
17882      already been added.  While we are doing this, also note the
17883      location where we must insert the constant if it doesn't already
17884      exist.  */
17885   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17886     {
17887       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17888           && fix->mode == mp->mode
17889           && (!LABEL_P (fix->value)
17890               || (CODE_LABEL_NUMBER (fix->value)
17891                   == CODE_LABEL_NUMBER (mp->value)))
17892           && rtx_equal_p (fix->value, mp->value)
17893           /* Check that there is enough slack to move this entry to the
17894              end of the table (this is conservative).  */
17895           && (mp->max_address
17896               > (minipool_barrier->address
17897                  + minipool_vector_tail->offset
17898                  + minipool_vector_tail->fix_size)))
17899         {
17900           mp->refcount++;
17901           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17902         }
17903
17904       if (min_mp != NULL)
17905         mp->min_address += fix->fix_size;
17906       else
17907         {
17908           /* Note the insertion point if necessary.  */
17909           if (mp->min_address < min_address)
17910             {
17911               /* For now, we do not allow the insertion of 8-byte alignment
17912                  requiring nodes anywhere but at the start of the pool.  */
17913               if (ARM_DOUBLEWORD_ALIGN
17914                   && fix->fix_size >= 8 && mp->fix_size < 8)
17915                 return NULL;
17916               else
17917                 min_mp = mp;
17918             }
17919           else if (mp->max_address
17920                    < minipool_barrier->address + mp->offset + fix->fix_size)
17921             {
17922               /* Inserting before this entry would push the fix beyond
17923                  its maximum address (which can happen if we have
17924                  re-located a forwards fix); force the new fix to come
17925                  after it.  */
17926               if (ARM_DOUBLEWORD_ALIGN
17927                   && fix->fix_size >= 8 && mp->fix_size < 8)
17928                 return NULL;
17929               else
17930                 {
17931                   min_mp = mp;
17932                   min_address = mp->min_address + fix->fix_size;
17933                 }
17934             }
17935           /* Do not insert a non-8-byte aligned quantity before 8-byte
17936              aligned quantities.  */
17937           else if (ARM_DOUBLEWORD_ALIGN
17938                    && fix->fix_size < 8
17939                    && mp->fix_size >= 8)
17940             {
17941               min_mp = mp;
17942               min_address = mp->min_address + fix->fix_size;
17943             }
17944         }
17945     }
17946
17947   /* We need to create a new entry.  */
17948   mp = XNEW (Mnode);
17949   mp->fix_size = fix->fix_size;
17950   mp->mode = fix->mode;
17951   mp->value = fix->value;
17952   mp->refcount = 1;
17953   mp->max_address = minipool_barrier->address + 65536;
17954
17955   mp->min_address = min_address;
17956
17957   if (min_mp == NULL)
17958     {
17959       mp->prev = NULL;
17960       mp->next = minipool_vector_head;
17961
17962       if (mp->next == NULL)
17963         {
17964           minipool_vector_tail = mp;
17965           minipool_vector_label = gen_label_rtx ();
17966         }
17967       else
17968         mp->next->prev = mp;
17969
17970       minipool_vector_head = mp;
17971     }
17972   else
17973     {
17974       mp->next = min_mp->next;
17975       mp->prev = min_mp;
17976       min_mp->next = mp;
17977
17978       if (mp->next != NULL)
17979         mp->next->prev = mp;
17980       else
17981         minipool_vector_tail = mp;
17982     }
17983
17984   /* Save the new entry.  */
17985   min_mp = mp;
17986
17987   if (mp->prev)
17988     mp = mp->prev;
17989   else
17990     mp->offset = 0;
17991
17992   /* Scan over the following entries and adjust their offsets.  */
17993   while (mp->next != NULL)
17994     {
17995       if (mp->next->min_address < mp->min_address + mp->fix_size)
17996         mp->next->min_address = mp->min_address + mp->fix_size;
17997
17998       if (mp->refcount)
17999         mp->next->offset = mp->offset + mp->fix_size;
18000       else
18001         mp->next->offset = mp->offset;
18002
18003       mp = mp->next;
18004     }
18005
18006   return min_mp;
18007 }
18008
18009 static void
18010 assign_minipool_offsets (Mfix *barrier)
18011 {
18012   HOST_WIDE_INT offset = 0;
18013   Mnode *mp;
18014
18015   minipool_barrier = barrier;
18016
18017   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18018     {
18019       mp->offset = offset;
18020
18021       if (mp->refcount > 0)
18022         offset += mp->fix_size;
18023     }
18024 }
18025
18026 /* Output the literal table */
18027 static void
18028 dump_minipool (rtx_insn *scan)
18029 {
18030   Mnode * mp;
18031   Mnode * nmp;
18032   int align64 = 0;
18033
18034   if (ARM_DOUBLEWORD_ALIGN)
18035     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18036       if (mp->refcount > 0 && mp->fix_size >= 8)
18037         {
18038           align64 = 1;
18039           break;
18040         }
18041
18042   if (dump_file)
18043     fprintf (dump_file,
18044              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18045              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18046
18047   scan = emit_label_after (gen_label_rtx (), scan);
18048   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18049   scan = emit_label_after (minipool_vector_label, scan);
18050
18051   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18052     {
18053       if (mp->refcount > 0)
18054         {
18055           if (dump_file)
18056             {
18057               fprintf (dump_file,
18058                        ";;  Offset %u, min %ld, max %ld ",
18059                        (unsigned) mp->offset, (unsigned long) mp->min_address,
18060                        (unsigned long) mp->max_address);
18061               arm_print_value (dump_file, mp->value);
18062               fputc ('\n', dump_file);
18063             }
18064
18065           rtx val = copy_rtx (mp->value);
18066
18067           switch (GET_MODE_SIZE (mp->mode))
18068             {
18069 #ifdef HAVE_consttable_1
18070             case 1:
18071               scan = emit_insn_after (gen_consttable_1 (val), scan);
18072               break;
18073
18074 #endif
18075 #ifdef HAVE_consttable_2
18076             case 2:
18077               scan = emit_insn_after (gen_consttable_2 (val), scan);
18078               break;
18079
18080 #endif
18081 #ifdef HAVE_consttable_4
18082             case 4:
18083               scan = emit_insn_after (gen_consttable_4 (val), scan);
18084               break;
18085
18086 #endif
18087 #ifdef HAVE_consttable_8
18088             case 8:
18089               scan = emit_insn_after (gen_consttable_8 (val), scan);
18090               break;
18091
18092 #endif
18093 #ifdef HAVE_consttable_16
18094             case 16:
18095               scan = emit_insn_after (gen_consttable_16 (val), scan);
18096               break;
18097
18098 #endif
18099             default:
18100               gcc_unreachable ();
18101             }
18102         }
18103
18104       nmp = mp->next;
18105       free (mp);
18106     }
18107
18108   minipool_vector_head = minipool_vector_tail = NULL;
18109   scan = emit_insn_after (gen_consttable_end (), scan);
18110   scan = emit_barrier_after (scan);
18111 }
18112
18113 /* Return the cost of forcibly inserting a barrier after INSN.  */
18114 static int
18115 arm_barrier_cost (rtx_insn *insn)
18116 {
18117   /* Basing the location of the pool on the loop depth is preferable,
18118      but at the moment, the basic block information seems to be
18119      corrupt by this stage of the compilation.  */
18120   int base_cost = 50;
18121   rtx_insn *next = next_nonnote_insn (insn);
18122
18123   if (next != NULL && LABEL_P (next))
18124     base_cost -= 20;
18125
18126   switch (GET_CODE (insn))
18127     {
18128     case CODE_LABEL:
18129       /* It will always be better to place the table before the label, rather
18130          than after it.  */
18131       return 50;
18132
18133     case INSN:
18134     case CALL_INSN:
18135       return base_cost;
18136
18137     case JUMP_INSN:
18138       return base_cost - 10;
18139
18140     default:
18141       return base_cost + 10;
18142     }
18143 }
18144
18145 /* Find the best place in the insn stream in the range
18146    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18147    Create the barrier by inserting a jump and add a new fix entry for
18148    it.  */
18149 static Mfix *
18150 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18151 {
18152   HOST_WIDE_INT count = 0;
18153   rtx_barrier *barrier;
18154   rtx_insn *from = fix->insn;
18155   /* The instruction after which we will insert the jump.  */
18156   rtx_insn *selected = NULL;
18157   int selected_cost;
18158   /* The address at which the jump instruction will be placed.  */
18159   HOST_WIDE_INT selected_address;
18160   Mfix * new_fix;
18161   HOST_WIDE_INT max_count = max_address - fix->address;
18162   rtx_code_label *label = gen_label_rtx ();
18163
18164   selected_cost = arm_barrier_cost (from);
18165   selected_address = fix->address;
18166
18167   while (from && count < max_count)
18168     {
18169       rtx_jump_table_data *tmp;
18170       int new_cost;
18171
18172       /* This code shouldn't have been called if there was a natural barrier
18173          within range.  */
18174       gcc_assert (!BARRIER_P (from));
18175
18176       /* Count the length of this insn.  This must stay in sync with the
18177          code that pushes minipool fixes.  */
18178       if (LABEL_P (from))
18179         count += get_label_padding (from);
18180       else
18181         count += get_attr_length (from);
18182
18183       /* If there is a jump table, add its length.  */
18184       if (tablejump_p (from, NULL, &tmp))
18185         {
18186           count += get_jump_table_size (tmp);
18187
18188           /* Jump tables aren't in a basic block, so base the cost on
18189              the dispatch insn.  If we select this location, we will
18190              still put the pool after the table.  */
18191           new_cost = arm_barrier_cost (from);
18192
18193           if (count < max_count
18194               && (!selected || new_cost <= selected_cost))
18195             {
18196               selected = tmp;
18197               selected_cost = new_cost;
18198               selected_address = fix->address + count;
18199             }
18200
18201           /* Continue after the dispatch table.  */
18202           from = NEXT_INSN (tmp);
18203           continue;
18204         }
18205
18206       new_cost = arm_barrier_cost (from);
18207
18208       if (count < max_count
18209           && (!selected || new_cost <= selected_cost))
18210         {
18211           selected = from;
18212           selected_cost = new_cost;
18213           selected_address = fix->address + count;
18214         }
18215
18216       from = NEXT_INSN (from);
18217     }
18218
18219   /* Make sure that we found a place to insert the jump.  */
18220   gcc_assert (selected);
18221
18222   /* Create a new JUMP_INSN that branches around a barrier.  */
18223   from = emit_jump_insn_after (gen_jump (label), selected);
18224   JUMP_LABEL (from) = label;
18225   barrier = emit_barrier_after (from);
18226   emit_label_after (label, barrier);
18227
18228   /* Create a minipool barrier entry for the new barrier.  */
18229   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18230   new_fix->insn = barrier;
18231   new_fix->address = selected_address;
18232   new_fix->next = fix->next;
18233   fix->next = new_fix;
18234
18235   return new_fix;
18236 }
18237
18238 /* Record that there is a natural barrier in the insn stream at
18239    ADDRESS.  */
18240 static void
18241 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18242 {
18243   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18244
18245   fix->insn = insn;
18246   fix->address = address;
18247
18248   fix->next = NULL;
18249   if (minipool_fix_head != NULL)
18250     minipool_fix_tail->next = fix;
18251   else
18252     minipool_fix_head = fix;
18253
18254   minipool_fix_tail = fix;
18255 }
18256
18257 /* Record INSN, which will need fixing up to load a value from the
18258    minipool.  ADDRESS is the offset of the insn since the start of the
18259    function; LOC is a pointer to the part of the insn which requires
18260    fixing; VALUE is the constant that must be loaded, which is of type
18261    MODE.  */
18262 static void
18263 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18264                    machine_mode mode, rtx value)
18265 {
18266   gcc_assert (!arm_disable_literal_pool);
18267   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18268
18269   fix->insn = insn;
18270   fix->address = address;
18271   fix->loc = loc;
18272   fix->mode = mode;
18273   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18274   fix->value = value;
18275   fix->forwards = get_attr_pool_range (insn);
18276   fix->backwards = get_attr_neg_pool_range (insn);
18277   fix->minipool = NULL;
18278
18279   /* If an insn doesn't have a range defined for it, then it isn't
18280      expecting to be reworked by this code.  Better to stop now than
18281      to generate duff assembly code.  */
18282   gcc_assert (fix->forwards || fix->backwards);
18283
18284   /* If an entry requires 8-byte alignment then assume all constant pools
18285      require 4 bytes of padding.  Trying to do this later on a per-pool
18286      basis is awkward because existing pool entries have to be modified.  */
18287   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18288     minipool_pad = 4;
18289
18290   if (dump_file)
18291     {
18292       fprintf (dump_file,
18293                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18294                GET_MODE_NAME (mode),
18295                INSN_UID (insn), (unsigned long) address,
18296                -1 * (long)fix->backwards, (long)fix->forwards);
18297       arm_print_value (dump_file, fix->value);
18298       fprintf (dump_file, "\n");
18299     }
18300
18301   /* Add it to the chain of fixes.  */
18302   fix->next = NULL;
18303
18304   if (minipool_fix_head != NULL)
18305     minipool_fix_tail->next = fix;
18306   else
18307     minipool_fix_head = fix;
18308
18309   minipool_fix_tail = fix;
18310 }
18311
18312 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18313    Returns the number of insns needed, or 99 if we always want to synthesize
18314    the value.  */
18315 int
18316 arm_max_const_double_inline_cost ()
18317 {
18318   return ((optimize_size || arm_ld_sched) ? 3 : 4);
18319 }
18320
18321 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18322    Returns the number of insns needed, or 99 if we don't know how to
18323    do it.  */
18324 int
18325 arm_const_double_inline_cost (rtx val)
18326 {
18327   rtx lowpart, highpart;
18328   machine_mode mode;
18329
18330   mode = GET_MODE (val);
18331
18332   if (mode == VOIDmode)
18333     mode = DImode;
18334
18335   gcc_assert (GET_MODE_SIZE (mode) == 8);
18336
18337   lowpart = gen_lowpart (SImode, val);
18338   highpart = gen_highpart_mode (SImode, mode, val);
18339
18340   gcc_assert (CONST_INT_P (lowpart));
18341   gcc_assert (CONST_INT_P (highpart));
18342
18343   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18344                             NULL_RTX, NULL_RTX, 0, 0)
18345           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18346                               NULL_RTX, NULL_RTX, 0, 0));
18347 }
18348
18349 /* Cost of loading a SImode constant.  */
18350 static inline int
18351 arm_const_inline_cost (enum rtx_code code, rtx val)
18352 {
18353   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18354                            NULL_RTX, NULL_RTX, 1, 0);
18355 }
18356
18357 /* Return true if it is worthwhile to split a 64-bit constant into two
18358    32-bit operations.  This is the case if optimizing for size, or
18359    if we have load delay slots, or if one 32-bit part can be done with
18360    a single data operation.  */
18361 bool
18362 arm_const_double_by_parts (rtx val)
18363 {
18364   machine_mode mode = GET_MODE (val);
18365   rtx part;
18366
18367   if (optimize_size || arm_ld_sched)
18368     return true;
18369
18370   if (mode == VOIDmode)
18371     mode = DImode;
18372
18373   part = gen_highpart_mode (SImode, mode, val);
18374
18375   gcc_assert (CONST_INT_P (part));
18376
18377   if (const_ok_for_arm (INTVAL (part))
18378       || const_ok_for_arm (~INTVAL (part)))
18379     return true;
18380
18381   part = gen_lowpart (SImode, val);
18382
18383   gcc_assert (CONST_INT_P (part));
18384
18385   if (const_ok_for_arm (INTVAL (part))
18386       || const_ok_for_arm (~INTVAL (part)))
18387     return true;
18388
18389   return false;
18390 }
18391
18392 /* Return true if it is possible to inline both the high and low parts
18393    of a 64-bit constant into 32-bit data processing instructions.  */
18394 bool
18395 arm_const_double_by_immediates (rtx val)
18396 {
18397   machine_mode mode = GET_MODE (val);
18398   rtx part;
18399
18400   if (mode == VOIDmode)
18401     mode = DImode;
18402
18403   part = gen_highpart_mode (SImode, mode, val);
18404
18405   gcc_assert (CONST_INT_P (part));
18406
18407   if (!const_ok_for_arm (INTVAL (part)))
18408     return false;
18409
18410   part = gen_lowpart (SImode, val);
18411
18412   gcc_assert (CONST_INT_P (part));
18413
18414   if (!const_ok_for_arm (INTVAL (part)))
18415     return false;
18416
18417   return true;
18418 }
18419
18420 /* Scan INSN and note any of its operands that need fixing.
18421    If DO_PUSHES is false we do not actually push any of the fixups
18422    needed.  */
18423 static void
18424 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18425 {
18426   int opno;
18427
18428   extract_constrain_insn (insn);
18429
18430   if (recog_data.n_alternatives == 0)
18431     return;
18432
18433   /* Fill in recog_op_alt with information about the constraints of
18434      this insn.  */
18435   preprocess_constraints (insn);
18436
18437   const operand_alternative *op_alt = which_op_alt ();
18438   for (opno = 0; opno < recog_data.n_operands; opno++)
18439     {
18440       /* Things we need to fix can only occur in inputs.  */
18441       if (recog_data.operand_type[opno] != OP_IN)
18442         continue;
18443
18444       /* If this alternative is a memory reference, then any mention
18445          of constants in this alternative is really to fool reload
18446          into allowing us to accept one there.  We need to fix them up
18447          now so that we output the right code.  */
18448       if (op_alt[opno].memory_ok)
18449         {
18450           rtx op = recog_data.operand[opno];
18451
18452           if (CONSTANT_P (op))
18453             {
18454               if (do_pushes)
18455                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18456                                    recog_data.operand_mode[opno], op);
18457             }
18458           else if (MEM_P (op)
18459                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18460                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18461             {
18462               if (do_pushes)
18463                 {
18464                   rtx cop = avoid_constant_pool_reference (op);
18465
18466                   /* Casting the address of something to a mode narrower
18467                      than a word can cause avoid_constant_pool_reference()
18468                      to return the pool reference itself.  That's no good to
18469                      us here.  Lets just hope that we can use the
18470                      constant pool value directly.  */
18471                   if (op == cop)
18472                     cop = get_pool_constant (XEXP (op, 0));
18473
18474                   push_minipool_fix (insn, address,
18475                                      recog_data.operand_loc[opno],
18476                                      recog_data.operand_mode[opno], cop);
18477                 }
18478
18479             }
18480         }
18481     }
18482
18483   return;
18484 }
18485
18486 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18487    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18488    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18489    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18490    or four masks, depending on whether it is being computed for a
18491    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18492    respectively.  The tree for the type of the argument or a field within an
18493    argument is passed in ARG_TYPE, the current register this argument or field
18494    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18495    argument or field starts at is passed in STARTING_BIT and the last used bit
18496    is kept in LAST_USED_BIT which is also updated accordingly.  */
18497
18498 static unsigned HOST_WIDE_INT
18499 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18500                                uint32_t * padding_bits_to_clear,
18501                                unsigned starting_bit, int * last_used_bit)
18502
18503 {
18504   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18505
18506   if (TREE_CODE (arg_type) == RECORD_TYPE)
18507     {
18508       unsigned current_bit = starting_bit;
18509       tree field;
18510       long int offset, size;
18511
18512
18513       field = TYPE_FIELDS (arg_type);
18514       while (field)
18515         {
18516           /* The offset within a structure is always an offset from
18517              the start of that structure.  Make sure we take that into the
18518              calculation of the register based offset that we use here.  */
18519           offset = starting_bit;
18520           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18521           offset %= 32;
18522
18523           /* This is the actual size of the field, for bitfields this is the
18524              bitfield width and not the container size.  */
18525           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18526
18527           if (*last_used_bit != offset)
18528             {
18529               if (offset < *last_used_bit)
18530                 {
18531                   /* This field's offset is before the 'last_used_bit', that
18532                      means this field goes on the next register.  So we need to
18533                      pad the rest of the current register and increase the
18534                      register number.  */
18535                   uint32_t mask;
18536                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18537                   mask++;
18538
18539                   padding_bits_to_clear[*regno] |= mask;
18540                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18541                   (*regno)++;
18542                 }
18543               else
18544                 {
18545                   /* Otherwise we pad the bits between the last field's end and
18546                      the start of the new field.  */
18547                   uint32_t mask;
18548
18549                   mask = ((uint32_t)-1) >> (32 - offset);
18550                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18551                   padding_bits_to_clear[*regno] |= mask;
18552                 }
18553               current_bit = offset;
18554             }
18555
18556           /* Calculate further padding bits for inner structs/unions too.  */
18557           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18558             {
18559               *last_used_bit = current_bit;
18560               not_to_clear_reg_mask
18561                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18562                                                   padding_bits_to_clear, offset,
18563                                                   last_used_bit);
18564             }
18565           else
18566             {
18567               /* Update 'current_bit' with this field's size.  If the
18568                  'current_bit' lies in a subsequent register, update 'regno' and
18569                  reset 'current_bit' to point to the current bit in that new
18570                  register.  */
18571               current_bit += size;
18572               while (current_bit >= 32)
18573                 {
18574                   current_bit-=32;
18575                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18576                   (*regno)++;
18577                 }
18578               *last_used_bit = current_bit;
18579             }
18580
18581           field = TREE_CHAIN (field);
18582         }
18583       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18584     }
18585   else if (TREE_CODE (arg_type) == UNION_TYPE)
18586     {
18587       tree field, field_t;
18588       int i, regno_t, field_size;
18589       int max_reg = -1;
18590       int max_bit = -1;
18591       uint32_t mask;
18592       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18593         = {-1, -1, -1, -1};
18594
18595       /* To compute the padding bits in a union we only consider bits as
18596          padding bits if they are always either a padding bit or fall outside a
18597          fields size for all fields in the union.  */
18598       field = TYPE_FIELDS (arg_type);
18599       while (field)
18600         {
18601           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18602             = {0U, 0U, 0U, 0U};
18603           int last_used_bit_t = *last_used_bit;
18604           regno_t = *regno;
18605           field_t = TREE_TYPE (field);
18606
18607           /* If the field's type is either a record or a union make sure to
18608              compute their padding bits too.  */
18609           if (RECORD_OR_UNION_TYPE_P (field_t))
18610             not_to_clear_reg_mask
18611               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18612                                                 &padding_bits_to_clear_t[0],
18613                                                 starting_bit, &last_used_bit_t);
18614           else
18615             {
18616               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18617               regno_t = (field_size / 32) + *regno;
18618               last_used_bit_t = (starting_bit + field_size) % 32;
18619             }
18620
18621           for (i = *regno; i < regno_t; i++)
18622             {
18623               /* For all but the last register used by this field only keep the
18624                  padding bits that were padding bits in this field.  */
18625               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18626             }
18627
18628             /* For the last register, keep all padding bits that were padding
18629                bits in this field and any padding bits that are still valid
18630                as padding bits but fall outside of this field's size.  */
18631             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18632             padding_bits_to_clear_res[regno_t]
18633               &= padding_bits_to_clear_t[regno_t] | mask;
18634
18635           /* Update the maximum size of the fields in terms of registers used
18636              ('max_reg') and the 'last_used_bit' in said register.  */
18637           if (max_reg < regno_t)
18638             {
18639               max_reg = regno_t;
18640               max_bit = last_used_bit_t;
18641             }
18642           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18643             max_bit = last_used_bit_t;
18644
18645           field = TREE_CHAIN (field);
18646         }
18647
18648       /* Update the current padding_bits_to_clear using the intersection of the
18649          padding bits of all the fields.  */
18650       for (i=*regno; i < max_reg; i++)
18651         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18652
18653       /* Do not keep trailing padding bits, we do not know yet whether this
18654          is the end of the argument.  */
18655       mask = ((uint32_t) 1 << max_bit) - 1;
18656       padding_bits_to_clear[max_reg]
18657         |= padding_bits_to_clear_res[max_reg] & mask;
18658
18659       *regno = max_reg;
18660       *last_used_bit = max_bit;
18661     }
18662   else
18663     /* This function should only be used for structs and unions.  */
18664     gcc_unreachable ();
18665
18666   return not_to_clear_reg_mask;
18667 }
18668
18669 /* In the context of ARMv8-M Security Extensions, this function is used for both
18670    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18671    registers are used when returning or passing arguments, which is then
18672    returned as a mask.  It will also compute a mask to indicate padding/unused
18673    bits for each of these registers, and passes this through the
18674    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18675    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18676    the starting register used to pass this argument or return value is passed
18677    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18678    for struct and union types.  */
18679
18680 static unsigned HOST_WIDE_INT
18681 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18682                              uint32_t * padding_bits_to_clear)
18683
18684 {
18685   int last_used_bit = 0;
18686   unsigned HOST_WIDE_INT not_to_clear_mask;
18687
18688   if (RECORD_OR_UNION_TYPE_P (arg_type))
18689     {
18690       not_to_clear_mask
18691         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18692                                          padding_bits_to_clear, 0,
18693                                          &last_used_bit);
18694
18695
18696       /* If the 'last_used_bit' is not zero, that means we are still using a
18697          part of the last 'regno'.  In such cases we must clear the trailing
18698          bits.  Otherwise we are not using regno and we should mark it as to
18699          clear.  */
18700       if (last_used_bit != 0)
18701         padding_bits_to_clear[regno]
18702           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18703       else
18704         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18705     }
18706   else
18707     {
18708       not_to_clear_mask = 0;
18709       /* We are not dealing with structs nor unions.  So these arguments may be
18710          passed in floating point registers too.  In some cases a BLKmode is
18711          used when returning or passing arguments in multiple VFP registers.  */
18712       if (GET_MODE (arg_rtx) == BLKmode)
18713         {
18714           int i, arg_regs;
18715           rtx reg;
18716
18717           /* This should really only occur when dealing with the hard-float
18718              ABI.  */
18719           gcc_assert (TARGET_HARD_FLOAT_ABI);
18720
18721           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18722             {
18723               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18724               gcc_assert (REG_P (reg));
18725
18726               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18727
18728               /* If we are dealing with DF mode, make sure we don't
18729                  clear either of the registers it addresses.  */
18730               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18731               if (arg_regs > 1)
18732                 {
18733                   unsigned HOST_WIDE_INT mask;
18734                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18735                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18736                   not_to_clear_mask |= mask;
18737                 }
18738             }
18739         }
18740       else
18741         {
18742           /* Otherwise we can rely on the MODE to determine how many registers
18743              are being used by this argument.  */
18744           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18745           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18746           if (arg_regs > 1)
18747             {
18748               unsigned HOST_WIDE_INT
18749               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18750               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18751               not_to_clear_mask |= mask;
18752             }
18753         }
18754     }
18755
18756   return not_to_clear_mask;
18757 }
18758
18759 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18760    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18761    are to be fully cleared, using the value in register CLEARING_REG if more
18762    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18763    the bits that needs to be cleared in caller-saved core registers, with
18764    SCRATCH_REG used as a scratch register for that clearing.
18765
18766    NOTE: one of three following assertions must hold:
18767    - SCRATCH_REG is a low register
18768    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18769      in TO_CLEAR_BITMAP)
18770    - CLEARING_REG is a low register.  */
18771
18772 static void
18773 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18774                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18775 {
18776   bool saved_clearing = false;
18777   rtx saved_clearing_reg = NULL_RTX;
18778   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18779
18780   gcc_assert (arm_arch_cmse);
18781
18782   if (!bitmap_empty_p (to_clear_bitmap))
18783     {
18784       minregno = bitmap_first_set_bit (to_clear_bitmap);
18785       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18786     }
18787   clearing_regno = REGNO (clearing_reg);
18788
18789   /* Clear padding bits.  */
18790   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18791   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18792     {
18793       uint64_t mask;
18794       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18795
18796       if (padding_bits_to_clear[i] == 0)
18797         continue;
18798
18799       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18800          CLEARING_REG as scratch.  */
18801       if (TARGET_THUMB1
18802           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18803         {
18804           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18805              such that we can use clearing_reg to clear the unused bits in the
18806              arguments.  */
18807           if ((clearing_regno > maxregno
18808                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18809               && !saved_clearing)
18810             {
18811               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18812               emit_move_insn (scratch_reg, clearing_reg);
18813               saved_clearing = true;
18814               saved_clearing_reg = scratch_reg;
18815             }
18816           scratch_reg = clearing_reg;
18817         }
18818
18819       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18820       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18821       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18822
18823       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18824       mask = (~padding_bits_to_clear[i]) >> 16;
18825       rtx16 = gen_int_mode (16, SImode);
18826       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18827       if (mask)
18828         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18829
18830       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18831     }
18832   if (saved_clearing)
18833     emit_move_insn (clearing_reg, saved_clearing_reg);
18834
18835
18836   /* Clear full registers.  */
18837
18838   if (TARGET_HAVE_FPCXT_CMSE)
18839     {
18840       rtvec vunspec_vec;
18841       int i, j, k, nb_regs;
18842       rtx use_seq, par, reg, set, vunspec;
18843       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18844       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18845       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18846
18847       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18848         {
18849           /* Find next register to clear and exit if none.  */
18850           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18851           if (i > maxregno)
18852             break;
18853
18854           /* Compute number of consecutive registers to clear.  */
18855           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18856                j++);
18857           nb_regs = j - i;
18858
18859           /* Create VSCCLRM RTX pattern.  */
18860           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18861           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18862           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18863                                              VUNSPEC_VSCCLRM_VPR);
18864           XVECEXP (par, 0, 0) = vunspec;
18865
18866           /* Insert VFP register clearing RTX in the pattern.  */
18867           start_sequence ();
18868           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18869             {
18870               if (!bitmap_bit_p (to_clear_bitmap, j))
18871                 continue;
18872
18873               reg = gen_rtx_REG (SFmode, j);
18874               set = gen_rtx_SET (reg, const0_rtx);
18875               XVECEXP (par, 0, k++) = set;
18876               emit_use (reg);
18877             }
18878           use_seq = get_insns ();
18879           end_sequence ();
18880
18881           emit_insn_after (use_seq, emit_insn (par));
18882         }
18883
18884       /* Get set of core registers to clear.  */
18885       bitmap_clear (core_regs_bitmap);
18886       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18887                         IP_REGNUM - R0_REGNUM + 1);
18888       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18889                   core_regs_bitmap);
18890       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18891
18892       if (bitmap_empty_p (to_clear_core_bitmap))
18893         return;
18894
18895       /* Create clrm RTX pattern.  */
18896       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18897       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18898
18899       /* Insert core register clearing RTX in the pattern.  */
18900       start_sequence ();
18901       for (j = 0, i = minregno; j < nb_regs; i++)
18902         {
18903           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18904             continue;
18905
18906           reg = gen_rtx_REG (SImode, i);
18907           set = gen_rtx_SET (reg, const0_rtx);
18908           XVECEXP (par, 0, j++) = set;
18909           emit_use (reg);
18910         }
18911
18912       /* Insert APSR register clearing RTX in the pattern
18913        * along with clobbering CC.  */
18914       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18915       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18916                                          VUNSPEC_CLRM_APSR);
18917
18918       XVECEXP (par, 0, j++) = vunspec;
18919
18920       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18921       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18922       XVECEXP (par, 0, j) = clobber;
18923
18924       use_seq = get_insns ();
18925       end_sequence ();
18926
18927       emit_insn_after (use_seq, emit_insn (par));
18928     }
18929   else
18930     {
18931       /* If not marked for clearing, clearing_reg already does not contain
18932          any secret.  */
18933       if (clearing_regno <= maxregno
18934           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18935         {
18936           emit_move_insn (clearing_reg, const0_rtx);
18937           emit_use (clearing_reg);
18938           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18939         }
18940
18941       for (regno = minregno; regno <= maxregno; regno++)
18942         {
18943           if (!bitmap_bit_p (to_clear_bitmap, regno))
18944             continue;
18945
18946           if (IS_VFP_REGNUM (regno))
18947             {
18948               /* If regno is an even vfp register and its successor is also to
18949                  be cleared, use vmov.  */
18950               if (TARGET_VFP_DOUBLE
18951                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18952                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18953                 {
18954                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18955                                   CONST1_RTX (DFmode));
18956                   emit_use (gen_rtx_REG (DFmode, regno));
18957                   regno++;
18958                 }
18959               else
18960                 {
18961                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18962                                   CONST1_RTX (SFmode));
18963                   emit_use (gen_rtx_REG (SFmode, regno));
18964                 }
18965             }
18966           else
18967             {
18968               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18969               emit_use (gen_rtx_REG (SImode, regno));
18970             }
18971         }
18972     }
18973 }
18974
18975 /* Clear core and caller-saved VFP registers not used to pass arguments before
18976    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18977    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18978    libgcc/config/arm/cmse_nonsecure_call.S.  */
18979
18980 static void
18981 cmse_nonsecure_call_inline_register_clear (void)
18982 {
18983   basic_block bb;
18984
18985   FOR_EACH_BB_FN (bb, cfun)
18986     {
18987       rtx_insn *insn;
18988
18989       FOR_BB_INSNS (bb, insn)
18990         {
18991           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18992           /* frame = VFP regs + FPSCR + VPR.  */
18993           unsigned lazy_store_stack_frame_size
18994             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18995           unsigned long callee_saved_mask
18996             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18997             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18998           unsigned address_regnum, regno;
18999           unsigned max_int_regno
19000             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
19001           unsigned max_fp_regno
19002             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
19003           unsigned maxregno
19004             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
19005           auto_sbitmap to_clear_bitmap (maxregno + 1);
19006           rtx_insn *seq;
19007           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
19008           rtx address;
19009           CUMULATIVE_ARGS args_so_far_v;
19010           cumulative_args_t args_so_far;
19011           tree arg_type, fntype;
19012           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
19013           function_args_iterator args_iter;
19014           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
19015
19016           if (!NONDEBUG_INSN_P (insn))
19017             continue;
19018
19019           if (!CALL_P (insn))
19020             continue;
19021
19022           pat = PATTERN (insn);
19023           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
19024           call = XVECEXP (pat, 0, 0);
19025
19026           /* Get the real call RTX if the insn sets a value, ie. returns.  */
19027           if (GET_CODE (call) == SET)
19028               call = SET_SRC (call);
19029
19030           /* Check if it is a cmse_nonsecure_call.  */
19031           unspec = XEXP (call, 0);
19032           if (GET_CODE (unspec) != UNSPEC
19033               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
19034             continue;
19035
19036           /* Mark registers that needs to be cleared.  Those that holds a
19037              parameter are removed from the set further below.  */
19038           bitmap_clear (to_clear_bitmap);
19039           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
19040                             max_int_regno - R0_REGNUM + 1);
19041
19042           /* Only look at the caller-saved floating point registers in case of
19043              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
19044              lazy store and loads which clear both caller- and callee-saved
19045              registers.  */
19046           if (!lazy_fpclear)
19047             {
19048               auto_sbitmap float_bitmap (maxregno + 1);
19049
19050               bitmap_clear (float_bitmap);
19051               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19052                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
19053               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19054             }
19055
19056           /* Make sure the register used to hold the function address is not
19057              cleared.  */
19058           address = RTVEC_ELT (XVEC (unspec, 0), 0);
19059           gcc_assert (MEM_P (address));
19060           gcc_assert (REG_P (XEXP (address, 0)));
19061           address_regnum = REGNO (XEXP (address, 0));
19062           if (address_regnum <= max_int_regno)
19063             bitmap_clear_bit (to_clear_bitmap, address_regnum);
19064
19065           /* Set basic block of call insn so that df rescan is performed on
19066              insns inserted here.  */
19067           set_block_for_insn (insn, bb);
19068           df_set_flags (DF_DEFER_INSN_RESCAN);
19069           start_sequence ();
19070
19071           /* Make sure the scheduler doesn't schedule other insns beyond
19072              here.  */
19073           emit_insn (gen_blockage ());
19074
19075           /* Walk through all arguments and clear registers appropriately.
19076           */
19077           fntype = TREE_TYPE (MEM_EXPR (address));
19078           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19079                                     NULL_TREE);
19080           args_so_far = pack_cumulative_args (&args_so_far_v);
19081           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19082             {
19083               rtx arg_rtx;
19084               uint64_t to_clear_args_mask;
19085
19086               if (VOID_TYPE_P (arg_type))
19087                 continue;
19088
19089               function_arg_info arg (arg_type, /*named=*/true);
19090               if (!first_param)
19091                 /* ??? We should advance after processing the argument and pass
19092                    the argument we're advancing past.  */
19093                 arm_function_arg_advance (args_so_far, arg);
19094
19095               arg_rtx = arm_function_arg (args_so_far, arg);
19096               gcc_assert (REG_P (arg_rtx));
19097               to_clear_args_mask
19098                 = compute_not_to_clear_mask (arg_type, arg_rtx,
19099                                              REGNO (arg_rtx),
19100                                              &padding_bits_to_clear[0]);
19101               if (to_clear_args_mask)
19102                 {
19103                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
19104                     {
19105                       if (to_clear_args_mask & (1ULL << regno))
19106                         bitmap_clear_bit (to_clear_bitmap, regno);
19107                     }
19108                 }
19109
19110               first_param = false;
19111             }
19112
19113           /* We use right shift and left shift to clear the LSB of the address
19114              we jump to instead of using bic, to avoid having to use an extra
19115              register on Thumb-1.  */
19116           clearing_reg = XEXP (address, 0);
19117           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19118           emit_insn (gen_rtx_SET (clearing_reg, shift));
19119           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19120           emit_insn (gen_rtx_SET (clearing_reg, shift));
19121
19122           if (clear_callee_saved)
19123             {
19124               rtx push_insn =
19125                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19126               /* Disable frame debug info in push because it needs to be
19127                  disabled for pop (see below).  */
19128               RTX_FRAME_RELATED_P (push_insn) = 0;
19129
19130               /* Lazy store multiple.  */
19131               if (lazy_fpclear)
19132                 {
19133                   rtx imm;
19134                   rtx_insn *add_insn;
19135
19136                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19137                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19138                                                     stack_pointer_rtx, imm));
19139                   /* If we have the frame pointer, then it will be the
19140                      CFA reg.  Otherwise, the stack pointer is the CFA
19141                      reg, so we need to emit a CFA adjust.  */
19142                   if (!frame_pointer_needed)
19143                     arm_add_cfa_adjust_cfa_note (add_insn,
19144                                                  - lazy_store_stack_frame_size,
19145                                                  stack_pointer_rtx,
19146                                                  stack_pointer_rtx);
19147                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19148                 }
19149               /* Save VFP callee-saved registers.  */
19150               else
19151                 {
19152                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19153                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
19154                   /* Disable frame debug info in push because it needs to be
19155                      disabled for vpop (see below).  */
19156                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19157                 }
19158             }
19159
19160           /* Clear caller-saved registers that leak before doing a non-secure
19161              call.  */
19162           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19163           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19164                                 NUM_ARG_REGS, ip_reg, clearing_reg);
19165
19166           seq = get_insns ();
19167           end_sequence ();
19168           emit_insn_before (seq, insn);
19169
19170           if (TARGET_HAVE_FPCXT_CMSE)
19171             {
19172               rtx_insn *last, *pop_insn, *after = insn;
19173
19174               start_sequence ();
19175
19176               /* Lazy load multiple done as part of libcall in Armv8-M.  */
19177               if (lazy_fpclear)
19178                 {
19179                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19180                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19181                   rtx_insn *add_insn =
19182                     emit_insn (gen_addsi3 (stack_pointer_rtx,
19183                                            stack_pointer_rtx, imm));
19184                   if (!frame_pointer_needed)
19185                     arm_add_cfa_adjust_cfa_note (add_insn,
19186                                                  lazy_store_stack_frame_size,
19187                                                  stack_pointer_rtx,
19188                                                  stack_pointer_rtx);
19189                 }
19190               /* Restore VFP callee-saved registers.  */
19191               else
19192                 {
19193                   int nb_callee_saved_vfp_regs =
19194                     (max_fp_regno - D7_VFP_REGNUM) / 2;
19195                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19196                                               nb_callee_saved_vfp_regs,
19197                                               stack_pointer_rtx);
19198                   /* Disable frame debug info in vpop because the SP adjustment
19199                      is made using a CFA adjustment note while CFA used is
19200                      sometimes R7.  This then causes an assert failure in the
19201                      CFI note creation code.  */
19202                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19203                 }
19204
19205               arm_emit_multi_reg_pop (callee_saved_mask);
19206               pop_insn = get_last_insn ();
19207
19208               /* Disable frame debug info in pop because they reset the state
19209                  of popped registers to what it was at the beginning of the
19210                  function, before the prologue.  This leads to incorrect state
19211                  when doing the pop after the nonsecure call for registers that
19212                  are pushed both in prologue and before the nonsecure call.
19213
19214                  It also occasionally triggers an assert failure in CFI note
19215                  creation code when there are two codepaths to the epilogue,
19216                  one of which does not go through the nonsecure call.
19217                  Obviously this mean that debugging between the push and pop is
19218                  not reliable.  */
19219               RTX_FRAME_RELATED_P (pop_insn) = 0;
19220
19221               seq = get_insns ();
19222               last = get_last_insn ();
19223               end_sequence ();
19224
19225               emit_insn_after (seq, after);
19226
19227               /* Skip pop we have just inserted after nonsecure call, we know
19228                  it does not contain a nonsecure call.  */
19229               insn = last;
19230             }
19231         }
19232     }
19233 }
19234
19235 /* Rewrite move insn into subtract of 0 if the condition codes will
19236    be useful in next conditional jump insn.  */
19237
19238 static void
19239 thumb1_reorg (void)
19240 {
19241   basic_block bb;
19242
19243   FOR_EACH_BB_FN (bb, cfun)
19244     {
19245       rtx dest, src;
19246       rtx cmp, op0, op1, set = NULL;
19247       rtx_insn *prev, *insn = BB_END (bb);
19248       bool insn_clobbered = false;
19249
19250       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19251         insn = PREV_INSN (insn);
19252
19253       /* Find the last cbranchsi4_insn in basic block BB.  */
19254       if (insn == BB_HEAD (bb)
19255           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19256         continue;
19257
19258       /* Get the register with which we are comparing.  */
19259       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19260       op0 = XEXP (cmp, 0);
19261       op1 = XEXP (cmp, 1);
19262
19263       /* Check that comparison is against ZERO.  */
19264       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19265         continue;
19266
19267       /* Find the first flag setting insn before INSN in basic block BB.  */
19268       gcc_assert (insn != BB_HEAD (bb));
19269       for (prev = PREV_INSN (insn);
19270            (!insn_clobbered
19271             && prev != BB_HEAD (bb)
19272             && (NOTE_P (prev)
19273                 || DEBUG_INSN_P (prev)
19274                 || ((set = single_set (prev)) != NULL
19275                     && get_attr_conds (prev) == CONDS_NOCOND)));
19276            prev = PREV_INSN (prev))
19277         {
19278           if (reg_set_p (op0, prev))
19279             insn_clobbered = true;
19280         }
19281
19282       /* Skip if op0 is clobbered by insn other than prev. */
19283       if (insn_clobbered)
19284         continue;
19285
19286       if (!set)
19287         continue;
19288
19289       dest = SET_DEST (set);
19290       src = SET_SRC (set);
19291       if (!low_register_operand (dest, SImode)
19292           || !low_register_operand (src, SImode))
19293         continue;
19294
19295       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19296          in INSN.  Both src and dest of the move insn are checked.  */
19297       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19298         {
19299           dest = copy_rtx (dest);
19300           src = copy_rtx (src);
19301           src = gen_rtx_MINUS (SImode, src, const0_rtx);
19302           PATTERN (prev) = gen_rtx_SET (dest, src);
19303           INSN_CODE (prev) = -1;
19304           /* Set test register in INSN to dest.  */
19305           XEXP (cmp, 0) = copy_rtx (dest);
19306           INSN_CODE (insn) = -1;
19307         }
19308     }
19309 }
19310
19311 /* Convert instructions to their cc-clobbering variant if possible, since
19312    that allows us to use smaller encodings.  */
19313
19314 static void
19315 thumb2_reorg (void)
19316 {
19317   basic_block bb;
19318   regset_head live;
19319
19320   INIT_REG_SET (&live);
19321
19322   /* We are freeing block_for_insn in the toplev to keep compatibility
19323      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
19324   compute_bb_for_insn ();
19325   df_analyze ();
19326
19327   enum Convert_Action {SKIP, CONV, SWAP_CONV};
19328
19329   FOR_EACH_BB_FN (bb, cfun)
19330     {
19331       if ((current_tune->disparage_flag_setting_t16_encodings
19332            == tune_params::DISPARAGE_FLAGS_ALL)
19333           && optimize_bb_for_speed_p (bb))
19334         continue;
19335
19336       rtx_insn *insn;
19337       Convert_Action action = SKIP;
19338       Convert_Action action_for_partial_flag_setting
19339         = ((current_tune->disparage_flag_setting_t16_encodings
19340             != tune_params::DISPARAGE_FLAGS_NEITHER)
19341            && optimize_bb_for_speed_p (bb))
19342           ? SKIP : CONV;
19343
19344       COPY_REG_SET (&live, DF_LR_OUT (bb));
19345       df_simulate_initialize_backwards (bb, &live);
19346       FOR_BB_INSNS_REVERSE (bb, insn)
19347         {
19348           if (NONJUMP_INSN_P (insn)
19349               && !REGNO_REG_SET_P (&live, CC_REGNUM)
19350               && GET_CODE (PATTERN (insn)) == SET)
19351             {
19352               action = SKIP;
19353               rtx pat = PATTERN (insn);
19354               rtx dst = XEXP (pat, 0);
19355               rtx src = XEXP (pat, 1);
19356               rtx op0 = NULL_RTX, op1 = NULL_RTX;
19357
19358               if (UNARY_P (src) || BINARY_P (src))
19359                   op0 = XEXP (src, 0);
19360
19361               if (BINARY_P (src))
19362                   op1 = XEXP (src, 1);
19363
19364               if (low_register_operand (dst, SImode))
19365                 {
19366                   switch (GET_CODE (src))
19367                     {
19368                     case PLUS:
19369                       /* Adding two registers and storing the result
19370                          in the first source is already a 16-bit
19371                          operation.  */
19372                       if (rtx_equal_p (dst, op0)
19373                           && register_operand (op1, SImode))
19374                         break;
19375
19376                       if (low_register_operand (op0, SImode))
19377                         {
19378                           /* ADDS <Rd>,<Rn>,<Rm>  */
19379                           if (low_register_operand (op1, SImode))
19380                             action = CONV;
19381                           /* ADDS <Rdn>,#<imm8>  */
19382                           /* SUBS <Rdn>,#<imm8>  */
19383                           else if (rtx_equal_p (dst, op0)
19384                                    && CONST_INT_P (op1)
19385                                    && IN_RANGE (INTVAL (op1), -255, 255))
19386                             action = CONV;
19387                           /* ADDS <Rd>,<Rn>,#<imm3>  */
19388                           /* SUBS <Rd>,<Rn>,#<imm3>  */
19389                           else if (CONST_INT_P (op1)
19390                                    && IN_RANGE (INTVAL (op1), -7, 7))
19391                             action = CONV;
19392                         }
19393                       /* ADCS <Rd>, <Rn>  */
19394                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19395                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19396                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19397                                                        SImode)
19398                               && COMPARISON_P (op1)
19399                               && cc_register (XEXP (op1, 0), VOIDmode)
19400                               && maybe_get_arm_condition_code (op1) == ARM_CS
19401                               && XEXP (op1, 1) == const0_rtx)
19402                         action = CONV;
19403                       break;
19404
19405                     case MINUS:
19406                       /* RSBS <Rd>,<Rn>,#0
19407                          Not handled here: see NEG below.  */
19408                       /* SUBS <Rd>,<Rn>,#<imm3>
19409                          SUBS <Rdn>,#<imm8>
19410                          Not handled here: see PLUS above.  */
19411                       /* SUBS <Rd>,<Rn>,<Rm>  */
19412                       if (low_register_operand (op0, SImode)
19413                           && low_register_operand (op1, SImode))
19414                             action = CONV;
19415                       break;
19416
19417                     case MULT:
19418                       /* MULS <Rdm>,<Rn>,<Rdm>
19419                          As an exception to the rule, this is only used
19420                          when optimizing for size since MULS is slow on all
19421                          known implementations.  We do not even want to use
19422                          MULS in cold code, if optimizing for speed, so we
19423                          test the global flag here.  */
19424                       if (!optimize_size)
19425                         break;
19426                       /* Fall through.  */
19427                     case AND:
19428                     case IOR:
19429                     case XOR:
19430                       /* ANDS <Rdn>,<Rm>  */
19431                       if (rtx_equal_p (dst, op0)
19432                           && low_register_operand (op1, SImode))
19433                         action = action_for_partial_flag_setting;
19434                       else if (rtx_equal_p (dst, op1)
19435                                && low_register_operand (op0, SImode))
19436                         action = action_for_partial_flag_setting == SKIP
19437                                  ? SKIP : SWAP_CONV;
19438                       break;
19439
19440                     case ASHIFTRT:
19441                     case ASHIFT:
19442                     case LSHIFTRT:
19443                       /* ASRS <Rdn>,<Rm> */
19444                       /* LSRS <Rdn>,<Rm> */
19445                       /* LSLS <Rdn>,<Rm> */
19446                       if (rtx_equal_p (dst, op0)
19447                           && low_register_operand (op1, SImode))
19448                         action = action_for_partial_flag_setting;
19449                       /* ASRS <Rd>,<Rm>,#<imm5> */
19450                       /* LSRS <Rd>,<Rm>,#<imm5> */
19451                       /* LSLS <Rd>,<Rm>,#<imm5> */
19452                       else if (low_register_operand (op0, SImode)
19453                                && CONST_INT_P (op1)
19454                                && IN_RANGE (INTVAL (op1), 0, 31))
19455                         action = action_for_partial_flag_setting;
19456                       break;
19457
19458                     case ROTATERT:
19459                       /* RORS <Rdn>,<Rm>  */
19460                       if (rtx_equal_p (dst, op0)
19461                           && low_register_operand (op1, SImode))
19462                         action = action_for_partial_flag_setting;
19463                       break;
19464
19465                     case NOT:
19466                       /* MVNS <Rd>,<Rm>  */
19467                       if (low_register_operand (op0, SImode))
19468                         action = action_for_partial_flag_setting;
19469                       break;
19470
19471                     case NEG:
19472                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19473                       if (low_register_operand (op0, SImode))
19474                         action = CONV;
19475                       break;
19476
19477                     case CONST_INT:
19478                       /* MOVS <Rd>,#<imm8>  */
19479                       if (CONST_INT_P (src)
19480                           && IN_RANGE (INTVAL (src), 0, 255))
19481                         action = action_for_partial_flag_setting;
19482                       break;
19483
19484                     case REG:
19485                       /* MOVS and MOV<c> with registers have different
19486                          encodings, so are not relevant here.  */
19487                       break;
19488
19489                     default:
19490                       break;
19491                     }
19492                 }
19493
19494               if (action != SKIP)
19495                 {
19496                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19497                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19498                   rtvec vec;
19499
19500                   if (action == SWAP_CONV)
19501                     {
19502                       src = copy_rtx (src);
19503                       XEXP (src, 0) = op1;
19504                       XEXP (src, 1) = op0;
19505                       pat = gen_rtx_SET (dst, src);
19506                       vec = gen_rtvec (2, pat, clobber);
19507                     }
19508                   else /* action == CONV */
19509                     vec = gen_rtvec (2, pat, clobber);
19510
19511                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19512                   INSN_CODE (insn) = -1;
19513                 }
19514             }
19515
19516           if (NONDEBUG_INSN_P (insn))
19517             df_simulate_one_insn_backwards (bb, insn, &live);
19518         }
19519     }
19520
19521   CLEAR_REG_SET (&live);
19522 }
19523
19524 /* Gcc puts the pool in the wrong place for ARM, since we can only
19525    load addresses a limited distance around the pc.  We do some
19526    special munging to move the constant pool values to the correct
19527    point in the code.  */
19528 static void
19529 arm_reorg (void)
19530 {
19531   rtx_insn *insn;
19532   HOST_WIDE_INT address = 0;
19533   Mfix * fix;
19534
19535   if (use_cmse)
19536     cmse_nonsecure_call_inline_register_clear ();
19537
19538   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19539   if (cfun->is_thunk)
19540     ;
19541   else if (TARGET_THUMB1)
19542     thumb1_reorg ();
19543   else if (TARGET_THUMB2)
19544     thumb2_reorg ();
19545
19546   /* Ensure all insns that must be split have been split at this point.
19547      Otherwise, the pool placement code below may compute incorrect
19548      insn lengths.  Note that when optimizing, all insns have already
19549      been split at this point.  */
19550   if (!optimize)
19551     split_all_insns_noflow ();
19552
19553   /* Make sure we do not attempt to create a literal pool even though it should
19554      no longer be necessary to create any.  */
19555   if (arm_disable_literal_pool)
19556     return ;
19557
19558   minipool_fix_head = minipool_fix_tail = NULL;
19559
19560   /* The first insn must always be a note, or the code below won't
19561      scan it properly.  */
19562   insn = get_insns ();
19563   gcc_assert (NOTE_P (insn));
19564   minipool_pad = 0;
19565
19566   /* Scan all the insns and record the operands that will need fixing.  */
19567   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19568     {
19569       if (BARRIER_P (insn))
19570         push_minipool_barrier (insn, address);
19571       else if (INSN_P (insn))
19572         {
19573           rtx_jump_table_data *table;
19574
19575           note_invalid_constants (insn, address, true);
19576           address += get_attr_length (insn);
19577
19578           /* If the insn is a vector jump, add the size of the table
19579              and skip the table.  */
19580           if (tablejump_p (insn, NULL, &table))
19581             {
19582               address += get_jump_table_size (table);
19583               insn = table;
19584             }
19585         }
19586       else if (LABEL_P (insn))
19587         /* Add the worst-case padding due to alignment.  We don't add
19588            the _current_ padding because the minipool insertions
19589            themselves might change it.  */
19590         address += get_label_padding (insn);
19591     }
19592
19593   fix = minipool_fix_head;
19594
19595   /* Now scan the fixups and perform the required changes.  */
19596   while (fix)
19597     {
19598       Mfix * ftmp;
19599       Mfix * fdel;
19600       Mfix *  last_added_fix;
19601       Mfix * last_barrier = NULL;
19602       Mfix * this_fix;
19603
19604       /* Skip any further barriers before the next fix.  */
19605       while (fix && BARRIER_P (fix->insn))
19606         fix = fix->next;
19607
19608       /* No more fixes.  */
19609       if (fix == NULL)
19610         break;
19611
19612       last_added_fix = NULL;
19613
19614       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19615         {
19616           if (BARRIER_P (ftmp->insn))
19617             {
19618               if (ftmp->address >= minipool_vector_head->max_address)
19619                 break;
19620
19621               last_barrier = ftmp;
19622             }
19623           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19624             break;
19625
19626           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19627         }
19628
19629       /* If we found a barrier, drop back to that; any fixes that we
19630          could have reached but come after the barrier will now go in
19631          the next mini-pool.  */
19632       if (last_barrier != NULL)
19633         {
19634           /* Reduce the refcount for those fixes that won't go into this
19635              pool after all.  */
19636           for (fdel = last_barrier->next;
19637                fdel && fdel != ftmp;
19638                fdel = fdel->next)
19639             {
19640               fdel->minipool->refcount--;
19641               fdel->minipool = NULL;
19642             }
19643
19644           ftmp = last_barrier;
19645         }
19646       else
19647         {
19648           /* ftmp is first fix that we can't fit into this pool and
19649              there no natural barriers that we could use.  Insert a
19650              new barrier in the code somewhere between the previous
19651              fix and this one, and arrange to jump around it.  */
19652           HOST_WIDE_INT max_address;
19653
19654           /* The last item on the list of fixes must be a barrier, so
19655              we can never run off the end of the list of fixes without
19656              last_barrier being set.  */
19657           gcc_assert (ftmp);
19658
19659           max_address = minipool_vector_head->max_address;
19660           /* Check that there isn't another fix that is in range that
19661              we couldn't fit into this pool because the pool was
19662              already too large: we need to put the pool before such an
19663              instruction.  The pool itself may come just after the
19664              fix because create_fix_barrier also allows space for a
19665              jump instruction.  */
19666           if (ftmp->address < max_address)
19667             max_address = ftmp->address + 1;
19668
19669           last_barrier = create_fix_barrier (last_added_fix, max_address);
19670         }
19671
19672       assign_minipool_offsets (last_barrier);
19673
19674       while (ftmp)
19675         {
19676           if (!BARRIER_P (ftmp->insn)
19677               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19678                   == NULL))
19679             break;
19680
19681           ftmp = ftmp->next;
19682         }
19683
19684       /* Scan over the fixes we have identified for this pool, fixing them
19685          up and adding the constants to the pool itself.  */
19686       for (this_fix = fix; this_fix && ftmp != this_fix;
19687            this_fix = this_fix->next)
19688         if (!BARRIER_P (this_fix->insn))
19689           {
19690             rtx addr
19691               = plus_constant (Pmode,
19692                                gen_rtx_LABEL_REF (VOIDmode,
19693                                                   minipool_vector_label),
19694                                this_fix->minipool->offset);
19695             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19696           }
19697
19698       dump_minipool (last_barrier->insn);
19699       fix = ftmp;
19700     }
19701
19702   /* From now on we must synthesize any constants that we can't handle
19703      directly.  This can happen if the RTL gets split during final
19704      instruction generation.  */
19705   cfun->machine->after_arm_reorg = 1;
19706
19707   /* Free the minipool memory.  */
19708   obstack_free (&minipool_obstack, minipool_startobj);
19709 }
19710 \f
19711 /* Routines to output assembly language.  */
19712
19713 /* Return string representation of passed in real value.  */
19714 static const char *
19715 fp_const_from_val (REAL_VALUE_TYPE *r)
19716 {
19717   if (!fp_consts_inited)
19718     init_fp_table ();
19719
19720   gcc_assert (real_equal (r, &value_fp0));
19721   return "0";
19722 }
19723
19724 /* OPERANDS[0] is the entire list of insns that constitute pop,
19725    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19726    is in the list, UPDATE is true iff the list contains explicit
19727    update of base register.  */
19728 void
19729 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19730                          bool update)
19731 {
19732   int i;
19733   char pattern[100];
19734   int offset;
19735   const char *conditional;
19736   int num_saves = XVECLEN (operands[0], 0);
19737   unsigned int regno;
19738   unsigned int regno_base = REGNO (operands[1]);
19739   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19740
19741   offset = 0;
19742   offset += update ? 1 : 0;
19743   offset += return_pc ? 1 : 0;
19744
19745   /* Is the base register in the list?  */
19746   for (i = offset; i < num_saves; i++)
19747     {
19748       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19749       /* If SP is in the list, then the base register must be SP.  */
19750       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19751       /* If base register is in the list, there must be no explicit update.  */
19752       if (regno == regno_base)
19753         gcc_assert (!update);
19754     }
19755
19756   conditional = reverse ? "%?%D0" : "%?%d0";
19757   /* Can't use POP if returning from an interrupt.  */
19758   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19759     sprintf (pattern, "pop%s\t{", conditional);
19760   else
19761     {
19762       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19763          It's just a convention, their semantics are identical.  */
19764       if (regno_base == SP_REGNUM)
19765         sprintf (pattern, "ldmfd%s\t", conditional);
19766       else if (update)
19767         sprintf (pattern, "ldmia%s\t", conditional);
19768       else
19769         sprintf (pattern, "ldm%s\t", conditional);
19770
19771       strcat (pattern, reg_names[regno_base]);
19772       if (update)
19773         strcat (pattern, "!, {");
19774       else
19775         strcat (pattern, ", {");
19776     }
19777
19778   /* Output the first destination register.  */
19779   strcat (pattern,
19780           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19781
19782   /* Output the rest of the destination registers.  */
19783   for (i = offset + 1; i < num_saves; i++)
19784     {
19785       strcat (pattern, ", ");
19786       strcat (pattern,
19787               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19788     }
19789
19790   strcat (pattern, "}");
19791
19792   if (interrupt_p && return_pc)
19793     strcat (pattern, "^");
19794
19795   output_asm_insn (pattern, &cond);
19796 }
19797
19798
19799 /* Output the assembly for a store multiple.  */
19800
19801 const char *
19802 vfp_output_vstmd (rtx * operands)
19803 {
19804   char pattern[100];
19805   int p;
19806   int base;
19807   int i;
19808   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19809                    ? XEXP (operands[0], 0)
19810                    : XEXP (XEXP (operands[0], 0), 0);
19811   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19812
19813   if (push_p)
19814     strcpy (pattern, "vpush%?.64\t{%P1");
19815   else
19816     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19817
19818   p = strlen (pattern);
19819
19820   gcc_assert (REG_P (operands[1]));
19821
19822   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19823   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19824     {
19825       p += sprintf (&pattern[p], ", d%d", base + i);
19826     }
19827   strcpy (&pattern[p], "}");
19828
19829   output_asm_insn (pattern, operands);
19830   return "";
19831 }
19832
19833
19834 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19835    number of bytes pushed.  */
19836
19837 static int
19838 vfp_emit_fstmd (int base_reg, int count)
19839 {
19840   rtx par;
19841   rtx dwarf;
19842   rtx tmp, reg;
19843   int i;
19844
19845   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19846      register pairs are stored by a store multiple insn.  We avoid this
19847      by pushing an extra pair.  */
19848   if (count == 2 && !arm_arch6)
19849     {
19850       if (base_reg == LAST_VFP_REGNUM - 3)
19851         base_reg -= 2;
19852       count++;
19853     }
19854
19855   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19856      larger stores into multiple parts (up to a maximum of two, in
19857      practice).  */
19858   if (count > 16)
19859     {
19860       int saved;
19861       /* NOTE: base_reg is an internal register number, so each D register
19862          counts as 2.  */
19863       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19864       saved += vfp_emit_fstmd (base_reg, 16);
19865       return saved;
19866     }
19867
19868   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19869   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19870
19871   reg = gen_rtx_REG (DFmode, base_reg);
19872   base_reg += 2;
19873
19874   XVECEXP (par, 0, 0)
19875     = gen_rtx_SET (gen_frame_mem
19876                    (BLKmode,
19877                     gen_rtx_PRE_MODIFY (Pmode,
19878                                         stack_pointer_rtx,
19879                                         plus_constant
19880                                         (Pmode, stack_pointer_rtx,
19881                                          - (count * 8)))
19882                     ),
19883                    gen_rtx_UNSPEC (BLKmode,
19884                                    gen_rtvec (1, reg),
19885                                    UNSPEC_PUSH_MULT));
19886
19887   tmp = gen_rtx_SET (stack_pointer_rtx,
19888                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19889   RTX_FRAME_RELATED_P (tmp) = 1;
19890   XVECEXP (dwarf, 0, 0) = tmp;
19891
19892   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19893   RTX_FRAME_RELATED_P (tmp) = 1;
19894   XVECEXP (dwarf, 0, 1) = tmp;
19895
19896   for (i = 1; i < count; i++)
19897     {
19898       reg = gen_rtx_REG (DFmode, base_reg);
19899       base_reg += 2;
19900       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19901
19902       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19903                                         plus_constant (Pmode,
19904                                                        stack_pointer_rtx,
19905                                                        i * 8)),
19906                          reg);
19907       RTX_FRAME_RELATED_P (tmp) = 1;
19908       XVECEXP (dwarf, 0, i + 1) = tmp;
19909     }
19910
19911   par = emit_insn (par);
19912   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19913   RTX_FRAME_RELATED_P (par) = 1;
19914
19915   return count * 8;
19916 }
19917
19918 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19919    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19920
19921 bool
19922 detect_cmse_nonsecure_call (tree addr)
19923 {
19924   if (!addr)
19925     return FALSE;
19926
19927   tree fntype = TREE_TYPE (addr);
19928   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19929                                     TYPE_ATTRIBUTES (fntype)))
19930     return TRUE;
19931   return FALSE;
19932 }
19933
19934
19935 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19936    the call target.  */
19937
19938 void
19939 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19940 {
19941   rtx insn;
19942
19943   insn = emit_call_insn (pat);
19944
19945   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19946      If the call might use such an entry, add a use of the PIC register
19947      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19948   if (TARGET_VXWORKS_RTP
19949       && flag_pic
19950       && !sibcall
19951       && SYMBOL_REF_P (addr)
19952       && (SYMBOL_REF_DECL (addr)
19953           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19954           : !SYMBOL_REF_LOCAL_P (addr)))
19955     {
19956       require_pic_register (NULL_RTX, false /*compute_now*/);
19957       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19958     }
19959
19960   if (TARGET_FDPIC)
19961     {
19962       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19963       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19964     }
19965
19966   if (TARGET_AAPCS_BASED)
19967     {
19968       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19969          linker.  We need to add an IP clobber to allow setting
19970          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19971          is not needed since it's a fixed register.  */
19972       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19973       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19974     }
19975 }
19976
19977 /* Output a 'call' insn.  */
19978 const char *
19979 output_call (rtx *operands)
19980 {
19981   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19982
19983   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19984   if (REGNO (operands[0]) == LR_REGNUM)
19985     {
19986       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19987       output_asm_insn ("mov%?\t%0, %|lr", operands);
19988     }
19989
19990   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19991
19992   if (TARGET_INTERWORK || arm_arch4t)
19993     output_asm_insn ("bx%?\t%0", operands);
19994   else
19995     output_asm_insn ("mov%?\t%|pc, %0", operands);
19996
19997   return "";
19998 }
19999
20000 /* Output a move from arm registers to arm registers of a long double
20001    OPERANDS[0] is the destination.
20002    OPERANDS[1] is the source.  */
20003 const char *
20004 output_mov_long_double_arm_from_arm (rtx *operands)
20005 {
20006   /* We have to be careful here because the two might overlap.  */
20007   int dest_start = REGNO (operands[0]);
20008   int src_start = REGNO (operands[1]);
20009   rtx ops[2];
20010   int i;
20011
20012   if (dest_start < src_start)
20013     {
20014       for (i = 0; i < 3; i++)
20015         {
20016           ops[0] = gen_rtx_REG (SImode, dest_start + i);
20017           ops[1] = gen_rtx_REG (SImode, src_start + i);
20018           output_asm_insn ("mov%?\t%0, %1", ops);
20019         }
20020     }
20021   else
20022     {
20023       for (i = 2; i >= 0; i--)
20024         {
20025           ops[0] = gen_rtx_REG (SImode, dest_start + i);
20026           ops[1] = gen_rtx_REG (SImode, src_start + i);
20027           output_asm_insn ("mov%?\t%0, %1", ops);
20028         }
20029     }
20030
20031   return "";
20032 }
20033
20034 void
20035 arm_emit_movpair (rtx dest, rtx src)
20036  {
20037   /* If the src is an immediate, simplify it.  */
20038   if (CONST_INT_P (src))
20039     {
20040       HOST_WIDE_INT val = INTVAL (src);
20041       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
20042       if ((val >> 16) & 0x0000ffff)
20043         {
20044           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20045                                                GEN_INT (16)),
20046                          GEN_INT ((val >> 16) & 0x0000ffff));
20047           rtx_insn *insn = get_last_insn ();
20048           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20049         }
20050       return;
20051     }
20052    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20053    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20054    rtx_insn *insn = get_last_insn ();
20055    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20056  }
20057
20058 /* Output a move between double words.  It must be REG<-MEM
20059    or MEM<-REG.  */
20060 const char *
20061 output_move_double (rtx *operands, bool emit, int *count)
20062 {
20063   enum rtx_code code0 = GET_CODE (operands[0]);
20064   enum rtx_code code1 = GET_CODE (operands[1]);
20065   rtx otherops[3];
20066   if (count)
20067     *count = 1;
20068
20069   /* The only case when this might happen is when
20070      you are looking at the length of a DImode instruction
20071      that has an invalid constant in it.  */
20072   if (code0 == REG && code1 != MEM)
20073     {
20074       gcc_assert (!emit);
20075       *count = 2;
20076       return "";
20077     }
20078
20079   if (code0 == REG)
20080     {
20081       unsigned int reg0 = REGNO (operands[0]);
20082       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20083
20084       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20085
20086       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
20087
20088       switch (GET_CODE (XEXP (operands[1], 0)))
20089         {
20090         case REG:
20091
20092           if (emit)
20093             {
20094               if (can_ldrd
20095                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20096                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20097               else
20098                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20099             }
20100           break;
20101
20102         case PRE_INC:
20103           gcc_assert (can_ldrd);
20104           if (emit)
20105             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20106           break;
20107
20108         case PRE_DEC:
20109           if (emit)
20110             {
20111               if (can_ldrd)
20112                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20113               else
20114                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20115             }
20116           break;
20117
20118         case POST_INC:
20119           if (emit)
20120             {
20121               if (can_ldrd)
20122                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20123               else
20124                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20125             }
20126           break;
20127
20128         case POST_DEC:
20129           gcc_assert (can_ldrd);
20130           if (emit)
20131             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20132           break;
20133
20134         case PRE_MODIFY:
20135         case POST_MODIFY:
20136           /* Autoicrement addressing modes should never have overlapping
20137              base and destination registers, and overlapping index registers
20138              are already prohibited, so this doesn't need to worry about
20139              fix_cm3_ldrd.  */
20140           otherops[0] = operands[0];
20141           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20142           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20143
20144           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20145             {
20146               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20147                 {
20148                   /* Registers overlap so split out the increment.  */
20149                   if (emit)
20150                     {
20151                       gcc_assert (can_ldrd);
20152                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
20153                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20154                     }
20155                   if (count)
20156                     *count = 2;
20157                 }
20158               else
20159                 {
20160                   /* Use a single insn if we can.
20161                      FIXME: IWMMXT allows offsets larger than ldrd can
20162                      handle, fix these up with a pair of ldr.  */
20163                   if (can_ldrd
20164                       && (TARGET_THUMB2
20165                       || !CONST_INT_P (otherops[2])
20166                       || (INTVAL (otherops[2]) > -256
20167                           && INTVAL (otherops[2]) < 256)))
20168                     {
20169                       if (emit)
20170                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20171                     }
20172                   else
20173                     {
20174                       if (emit)
20175                         {
20176                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20177                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20178                         }
20179                       if (count)
20180                         *count = 2;
20181
20182                     }
20183                 }
20184             }
20185           else
20186             {
20187               /* Use a single insn if we can.
20188                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
20189                  fix these up with a pair of ldr.  */
20190               if (can_ldrd
20191                   && (TARGET_THUMB2
20192                   || !CONST_INT_P (otherops[2])
20193                   || (INTVAL (otherops[2]) > -256
20194                       && INTVAL (otherops[2]) < 256)))
20195                 {
20196                   if (emit)
20197                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20198                 }
20199               else
20200                 {
20201                   if (emit)
20202                     {
20203                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20204                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20205                     }
20206                   if (count)
20207                     *count = 2;
20208                 }
20209             }
20210           break;
20211
20212         case LABEL_REF:
20213         case CONST:
20214           /* We might be able to use ldrd %0, %1 here.  However the range is
20215              different to ldr/adr, and it is broken on some ARMv7-M
20216              implementations.  */
20217           /* Use the second register of the pair to avoid problematic
20218              overlap.  */
20219           otherops[1] = operands[1];
20220           if (emit)
20221             output_asm_insn ("adr%?\t%0, %1", otherops);
20222           operands[1] = otherops[0];
20223           if (emit)
20224             {
20225               if (can_ldrd)
20226                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20227               else
20228                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20229             }
20230
20231           if (count)
20232             *count = 2;
20233           break;
20234
20235           /* ??? This needs checking for thumb2.  */
20236         default:
20237           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20238                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20239             {
20240               otherops[0] = operands[0];
20241               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20242               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20243
20244               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20245                 {
20246                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20247                     {
20248                       switch ((int) INTVAL (otherops[2]))
20249                         {
20250                         case -8:
20251                           if (emit)
20252                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20253                           return "";
20254                         case -4:
20255                           if (TARGET_THUMB2)
20256                             break;
20257                           if (emit)
20258                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20259                           return "";
20260                         case 4:
20261                           if (TARGET_THUMB2)
20262                             break;
20263                           if (emit)
20264                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20265                           return "";
20266                         }
20267                     }
20268                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20269                   operands[1] = otherops[0];
20270                   if (can_ldrd
20271                       && (REG_P (otherops[2])
20272                           || TARGET_THUMB2
20273                           || (CONST_INT_P (otherops[2])
20274                               && INTVAL (otherops[2]) > -256
20275                               && INTVAL (otherops[2]) < 256)))
20276                     {
20277                       if (reg_overlap_mentioned_p (operands[0],
20278                                                    otherops[2]))
20279                         {
20280                           /* Swap base and index registers over to
20281                              avoid a conflict.  */
20282                           std::swap (otherops[1], otherops[2]);
20283                         }
20284                       /* If both registers conflict, it will usually
20285                          have been fixed by a splitter.  */
20286                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
20287                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20288                         {
20289                           if (emit)
20290                             {
20291                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
20292                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20293                             }
20294                           if (count)
20295                             *count = 2;
20296                         }
20297                       else
20298                         {
20299                           otherops[0] = operands[0];
20300                           if (emit)
20301                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20302                         }
20303                       return "";
20304                     }
20305
20306                   if (CONST_INT_P (otherops[2]))
20307                     {
20308                       if (emit)
20309                         {
20310                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20311                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20312                           else
20313                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
20314                         }
20315                     }
20316                   else
20317                     {
20318                       if (emit)
20319                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
20320                     }
20321                 }
20322               else
20323                 {
20324                   if (emit)
20325                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20326                 }
20327
20328               if (count)
20329                 *count = 2;
20330
20331               if (can_ldrd)
20332                 return "ldrd%?\t%0, [%1]";
20333
20334               return "ldmia%?\t%1, %M0";
20335             }
20336           else
20337             {
20338               otherops[1] = adjust_address (operands[1], SImode, 4);
20339               /* Take care of overlapping base/data reg.  */
20340               if (reg_mentioned_p (operands[0], operands[1]))
20341                 {
20342                   if (emit)
20343                     {
20344                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20345                       output_asm_insn ("ldr%?\t%0, %1", operands);
20346                     }
20347                   if (count)
20348                     *count = 2;
20349
20350                 }
20351               else
20352                 {
20353                   if (emit)
20354                     {
20355                       output_asm_insn ("ldr%?\t%0, %1", operands);
20356                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20357                     }
20358                   if (count)
20359                     *count = 2;
20360                 }
20361             }
20362         }
20363     }
20364   else
20365     {
20366       /* Constraints should ensure this.  */
20367       gcc_assert (code0 == MEM && code1 == REG);
20368       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20369                   || (TARGET_ARM && TARGET_LDRD));
20370
20371       /* For TARGET_ARM the first source register of an STRD
20372          must be even.  This is usually the case for double-word
20373          values but user assembly constraints can force an odd
20374          starting register.  */
20375       bool allow_strd = TARGET_LDRD
20376                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20377       switch (GET_CODE (XEXP (operands[0], 0)))
20378         {
20379         case REG:
20380           if (emit)
20381             {
20382               if (allow_strd)
20383                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20384               else
20385                 output_asm_insn ("stm%?\t%m0, %M1", operands);
20386             }
20387           break;
20388
20389         case PRE_INC:
20390           gcc_assert (allow_strd);
20391           if (emit)
20392             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20393           break;
20394
20395         case PRE_DEC:
20396           if (emit)
20397             {
20398               if (allow_strd)
20399                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20400               else
20401                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20402             }
20403           break;
20404
20405         case POST_INC:
20406           if (emit)
20407             {
20408               if (allow_strd)
20409                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20410               else
20411                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20412             }
20413           break;
20414
20415         case POST_DEC:
20416           gcc_assert (allow_strd);
20417           if (emit)
20418             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20419           break;
20420
20421         case PRE_MODIFY:
20422         case POST_MODIFY:
20423           otherops[0] = operands[1];
20424           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20425           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20426
20427           /* IWMMXT allows offsets larger than strd can handle,
20428              fix these up with a pair of str.  */
20429           if (!TARGET_THUMB2
20430               && CONST_INT_P (otherops[2])
20431               && (INTVAL(otherops[2]) <= -256
20432                   || INTVAL(otherops[2]) >= 256))
20433             {
20434               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20435                 {
20436                   if (emit)
20437                     {
20438                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20439                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20440                     }
20441                   if (count)
20442                     *count = 2;
20443                 }
20444               else
20445                 {
20446                   if (emit)
20447                     {
20448                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20449                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20450                     }
20451                   if (count)
20452                     *count = 2;
20453                 }
20454             }
20455           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20456             {
20457               if (emit)
20458                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20459             }
20460           else
20461             {
20462               if (emit)
20463                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20464             }
20465           break;
20466
20467         case PLUS:
20468           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20469           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20470             {
20471               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20472                 {
20473                 case -8:
20474                   if (emit)
20475                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20476                   return "";
20477
20478                 case -4:
20479                   if (TARGET_THUMB2)
20480                     break;
20481                   if (emit)
20482                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20483                   return "";
20484
20485                 case 4:
20486                   if (TARGET_THUMB2)
20487                     break;
20488                   if (emit)
20489                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20490                   return "";
20491                 }
20492             }
20493           if (allow_strd
20494               && (REG_P (otherops[2])
20495                   || TARGET_THUMB2
20496                   || (CONST_INT_P (otherops[2])
20497                       && INTVAL (otherops[2]) > -256
20498                       && INTVAL (otherops[2]) < 256)))
20499             {
20500               otherops[0] = operands[1];
20501               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20502               if (emit)
20503                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20504               return "";
20505             }
20506           /* Fall through */
20507
20508         default:
20509           otherops[0] = adjust_address (operands[0], SImode, 4);
20510           otherops[1] = operands[1];
20511           if (emit)
20512             {
20513               output_asm_insn ("str%?\t%1, %0", operands);
20514               output_asm_insn ("str%?\t%H1, %0", otherops);
20515             }
20516           if (count)
20517             *count = 2;
20518         }
20519     }
20520
20521   return "";
20522 }
20523
20524 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20525    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20526
20527 const char *
20528 output_move_quad (rtx *operands)
20529 {
20530   if (REG_P (operands[0]))
20531     {
20532       /* Load, or reg->reg move.  */
20533
20534       if (MEM_P (operands[1]))
20535         {
20536           switch (GET_CODE (XEXP (operands[1], 0)))
20537             {
20538             case REG:
20539               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20540               break;
20541
20542             case LABEL_REF:
20543             case CONST:
20544               output_asm_insn ("adr%?\t%0, %1", operands);
20545               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20546               break;
20547
20548             default:
20549               gcc_unreachable ();
20550             }
20551         }
20552       else
20553         {
20554           rtx ops[2];
20555           int dest, src, i;
20556
20557           gcc_assert (REG_P (operands[1]));
20558
20559           dest = REGNO (operands[0]);
20560           src = REGNO (operands[1]);
20561
20562           /* This seems pretty dumb, but hopefully GCC won't try to do it
20563              very often.  */
20564           if (dest < src)
20565             for (i = 0; i < 4; i++)
20566               {
20567                 ops[0] = gen_rtx_REG (SImode, dest + i);
20568                 ops[1] = gen_rtx_REG (SImode, src + i);
20569                 output_asm_insn ("mov%?\t%0, %1", ops);
20570               }
20571           else
20572             for (i = 3; i >= 0; i--)
20573               {
20574                 ops[0] = gen_rtx_REG (SImode, dest + i);
20575                 ops[1] = gen_rtx_REG (SImode, src + i);
20576                 output_asm_insn ("mov%?\t%0, %1", ops);
20577               }
20578         }
20579     }
20580   else
20581     {
20582       gcc_assert (MEM_P (operands[0]));
20583       gcc_assert (REG_P (operands[1]));
20584       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20585
20586       switch (GET_CODE (XEXP (operands[0], 0)))
20587         {
20588         case REG:
20589           output_asm_insn ("stm%?\t%m0, %M1", operands);
20590           break;
20591
20592         default:
20593           gcc_unreachable ();
20594         }
20595     }
20596
20597   return "";
20598 }
20599
20600 /* Output a VFP load or store instruction.  */
20601
20602 const char *
20603 output_move_vfp (rtx *operands)
20604 {
20605   rtx reg, mem, addr, ops[2];
20606   int load = REG_P (operands[0]);
20607   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20608   int sp = (!TARGET_VFP_FP16INST
20609             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20610   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20611   const char *templ;
20612   char buff[50];
20613   machine_mode mode;
20614
20615   reg = operands[!load];
20616   mem = operands[load];
20617
20618   mode = GET_MODE (reg);
20619
20620   gcc_assert (REG_P (reg));
20621   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20622   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20623               || mode == SFmode
20624               || mode == DFmode
20625               || mode == HImode
20626               || mode == SImode
20627               || mode == DImode
20628               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20629   gcc_assert (MEM_P (mem));
20630
20631   addr = XEXP (mem, 0);
20632
20633   switch (GET_CODE (addr))
20634     {
20635     case PRE_DEC:
20636       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20637       ops[0] = XEXP (addr, 0);
20638       ops[1] = reg;
20639       break;
20640
20641     case POST_INC:
20642       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20643       ops[0] = XEXP (addr, 0);
20644       ops[1] = reg;
20645       break;
20646
20647     default:
20648       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20649       ops[0] = reg;
20650       ops[1] = mem;
20651       break;
20652     }
20653
20654   sprintf (buff, templ,
20655            load ? "ld" : "st",
20656            dp ? "64" : sp ? "32" : "16",
20657            dp ? "P" : "",
20658            integer_p ? "\t%@ int" : "");
20659   output_asm_insn (buff, ops);
20660
20661   return "";
20662 }
20663
20664 /* Output a Neon double-word or quad-word load or store, or a load
20665    or store for larger structure modes.
20666
20667    WARNING: The ordering of elements is weird in big-endian mode,
20668    because the EABI requires that vectors stored in memory appear
20669    as though they were stored by a VSTM, as required by the EABI.
20670    GCC RTL defines element ordering based on in-memory order.
20671    This can be different from the architectural ordering of elements
20672    within a NEON register. The intrinsics defined in arm_neon.h use the
20673    NEON register element ordering, not the GCC RTL element ordering.
20674
20675    For example, the in-memory ordering of a big-endian a quadword
20676    vector with 16-bit elements when stored from register pair {d0,d1}
20677    will be (lowest address first, d0[N] is NEON register element N):
20678
20679      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20680
20681    When necessary, quadword registers (dN, dN+1) are moved to ARM
20682    registers from rN in the order:
20683
20684      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20685
20686    So that STM/LDM can be used on vectors in ARM registers, and the
20687    same memory layout will result as if VSTM/VLDM were used.
20688
20689    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20690    possible, which allows use of appropriate alignment tags.
20691    Note that the choice of "64" is independent of the actual vector
20692    element size; this size simply ensures that the behavior is
20693    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20694
20695    Due to limitations of those instructions, use of VST1.64/VLD1.64
20696    is not possible if:
20697     - the address contains PRE_DEC, or
20698     - the mode refers to more than 4 double-word registers
20699
20700    In those cases, it would be possible to replace VSTM/VLDM by a
20701    sequence of instructions; this is not currently implemented since
20702    this is not certain to actually improve performance.  */
20703
20704 const char *
20705 output_move_neon (rtx *operands)
20706 {
20707   rtx reg, mem, addr, ops[2];
20708   int regno, nregs, load = REG_P (operands[0]);
20709   const char *templ;
20710   char buff[50];
20711   machine_mode mode;
20712
20713   reg = operands[!load];
20714   mem = operands[load];
20715
20716   mode = GET_MODE (reg);
20717
20718   gcc_assert (REG_P (reg));
20719   regno = REGNO (reg);
20720   nregs = REG_NREGS (reg) / 2;
20721   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20722               || NEON_REGNO_OK_FOR_QUAD (regno));
20723   gcc_assert (VALID_NEON_DREG_MODE (mode)
20724               || VALID_NEON_QREG_MODE (mode)
20725               || VALID_NEON_STRUCT_MODE (mode));
20726   gcc_assert (MEM_P (mem));
20727
20728   addr = XEXP (mem, 0);
20729
20730   /* Strip off const from addresses like (const (plus (...))).  */
20731   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20732     addr = XEXP (addr, 0);
20733
20734   switch (GET_CODE (addr))
20735     {
20736     case POST_INC:
20737       /* We have to use vldm / vstm for too-large modes.  */
20738       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20739         {
20740           templ = "v%smia%%?\t%%0!, %%h1";
20741           ops[0] = XEXP (addr, 0);
20742         }
20743       else
20744         {
20745           templ = "v%s1.64\t%%h1, %%A0";
20746           ops[0] = mem;
20747         }
20748       ops[1] = reg;
20749       break;
20750
20751     case PRE_DEC:
20752       /* We have to use vldm / vstm in this case, since there is no
20753          pre-decrement form of the vld1 / vst1 instructions.  */
20754       templ = "v%smdb%%?\t%%0!, %%h1";
20755       ops[0] = XEXP (addr, 0);
20756       ops[1] = reg;
20757       break;
20758
20759     case POST_MODIFY:
20760       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20761       gcc_unreachable ();
20762
20763     case REG:
20764       /* We have to use vldm / vstm for too-large modes.  */
20765       if (nregs > 1)
20766         {
20767           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20768             templ = "v%smia%%?\t%%m0, %%h1";
20769           else
20770             templ = "v%s1.64\t%%h1, %%A0";
20771
20772           ops[0] = mem;
20773           ops[1] = reg;
20774           break;
20775         }
20776       /* Fall through.  */
20777     case PLUS:
20778       if (GET_CODE (addr) == PLUS)
20779         addr = XEXP (addr, 0);
20780       /* Fall through.  */
20781     case LABEL_REF:
20782       {
20783         int i;
20784         int overlap = -1;
20785         for (i = 0; i < nregs; i++)
20786           {
20787             /* We're only using DImode here because it's a convenient
20788                size.  */
20789             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20790             ops[1] = adjust_address (mem, DImode, 8 * i);
20791             if (reg_overlap_mentioned_p (ops[0], mem))
20792               {
20793                 gcc_assert (overlap == -1);
20794                 overlap = i;
20795               }
20796             else
20797               {
20798                 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20799                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20800                 else
20801                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20802                 output_asm_insn (buff, ops);
20803               }
20804           }
20805         if (overlap != -1)
20806           {
20807             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20808             ops[1] = adjust_address (mem, SImode, 8 * overlap);
20809             if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20810               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20811             else
20812               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20813             output_asm_insn (buff, ops);
20814           }
20815
20816         return "";
20817       }
20818
20819     default:
20820       gcc_unreachable ();
20821     }
20822
20823   sprintf (buff, templ, load ? "ld" : "st");
20824   output_asm_insn (buff, ops);
20825
20826   return "";
20827 }
20828
20829 /* Compute and return the length of neon_mov<mode>, where <mode> is
20830    one of VSTRUCT modes: EI, OI, CI or XI.  */
20831 int
20832 arm_attr_length_move_neon (rtx_insn *insn)
20833 {
20834   rtx reg, mem, addr;
20835   int load;
20836   machine_mode mode;
20837
20838   extract_insn_cached (insn);
20839
20840   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20841     {
20842       mode = GET_MODE (recog_data.operand[0]);
20843       switch (mode)
20844         {
20845         case E_EImode:
20846         case E_OImode:
20847           return 8;
20848         case E_CImode:
20849           return 12;
20850         case E_XImode:
20851           return 16;
20852         default:
20853           gcc_unreachable ();
20854         }
20855     }
20856
20857   load = REG_P (recog_data.operand[0]);
20858   reg = recog_data.operand[!load];
20859   mem = recog_data.operand[load];
20860
20861   gcc_assert (MEM_P (mem));
20862
20863   addr = XEXP (mem, 0);
20864
20865   /* Strip off const from addresses like (const (plus (...))).  */
20866   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20867     addr = XEXP (addr, 0);
20868
20869   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20870     {
20871       int insns = REG_NREGS (reg) / 2;
20872       return insns * 4;
20873     }
20874   else
20875     return 4;
20876 }
20877
20878 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20879    return zero.  */
20880
20881 int
20882 arm_address_offset_is_imm (rtx_insn *insn)
20883 {
20884   rtx mem, addr;
20885
20886   extract_insn_cached (insn);
20887
20888   if (REG_P (recog_data.operand[0]))
20889     return 0;
20890
20891   mem = recog_data.operand[0];
20892
20893   gcc_assert (MEM_P (mem));
20894
20895   addr = XEXP (mem, 0);
20896
20897   if (REG_P (addr)
20898       || (GET_CODE (addr) == PLUS
20899           && REG_P (XEXP (addr, 0))
20900           && CONST_INT_P (XEXP (addr, 1))))
20901     return 1;
20902   else
20903     return 0;
20904 }
20905
20906 /* Output an ADD r, s, #n where n may be too big for one instruction.
20907    If adding zero to one register, output nothing.  */
20908 const char *
20909 output_add_immediate (rtx *operands)
20910 {
20911   HOST_WIDE_INT n = INTVAL (operands[2]);
20912
20913   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20914     {
20915       if (n < 0)
20916         output_multi_immediate (operands,
20917                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20918                                 -n);
20919       else
20920         output_multi_immediate (operands,
20921                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20922                                 n);
20923     }
20924
20925   return "";
20926 }
20927
20928 /* Output a multiple immediate operation.
20929    OPERANDS is the vector of operands referred to in the output patterns.
20930    INSTR1 is the output pattern to use for the first constant.
20931    INSTR2 is the output pattern to use for subsequent constants.
20932    IMMED_OP is the index of the constant slot in OPERANDS.
20933    N is the constant value.  */
20934 static const char *
20935 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20936                         int immed_op, HOST_WIDE_INT n)
20937 {
20938 #if HOST_BITS_PER_WIDE_INT > 32
20939   n &= 0xffffffff;
20940 #endif
20941
20942   if (n == 0)
20943     {
20944       /* Quick and easy output.  */
20945       operands[immed_op] = const0_rtx;
20946       output_asm_insn (instr1, operands);
20947     }
20948   else
20949     {
20950       int i;
20951       const char * instr = instr1;
20952
20953       /* Note that n is never zero here (which would give no output).  */
20954       for (i = 0; i < 32; i += 2)
20955         {
20956           if (n & (3 << i))
20957             {
20958               operands[immed_op] = GEN_INT (n & (255 << i));
20959               output_asm_insn (instr, operands);
20960               instr = instr2;
20961               i += 6;
20962             }
20963         }
20964     }
20965
20966   return "";
20967 }
20968
20969 /* Return the name of a shifter operation.  */
20970 static const char *
20971 arm_shift_nmem(enum rtx_code code)
20972 {
20973   switch (code)
20974     {
20975     case ASHIFT:
20976       return ARM_LSL_NAME;
20977
20978     case ASHIFTRT:
20979       return "asr";
20980
20981     case LSHIFTRT:
20982       return "lsr";
20983
20984     case ROTATERT:
20985       return "ror";
20986
20987     default:
20988       abort();
20989     }
20990 }
20991
20992 /* Return the appropriate ARM instruction for the operation code.
20993    The returned result should not be overwritten.  OP is the rtx of the
20994    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20995    was shifted.  */
20996 const char *
20997 arithmetic_instr (rtx op, int shift_first_arg)
20998 {
20999   switch (GET_CODE (op))
21000     {
21001     case PLUS:
21002       return "add";
21003
21004     case MINUS:
21005       return shift_first_arg ? "rsb" : "sub";
21006
21007     case IOR:
21008       return "orr";
21009
21010     case XOR:
21011       return "eor";
21012
21013     case AND:
21014       return "and";
21015
21016     case ASHIFT:
21017     case ASHIFTRT:
21018     case LSHIFTRT:
21019     case ROTATERT:
21020       return arm_shift_nmem(GET_CODE(op));
21021
21022     default:
21023       gcc_unreachable ();
21024     }
21025 }
21026
21027 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21028    for the operation code.  The returned result should not be overwritten.
21029    OP is the rtx code of the shift.
21030    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21031    shift.  */
21032 static const char *
21033 shift_op (rtx op, HOST_WIDE_INT *amountp)
21034 {
21035   const char * mnem;
21036   enum rtx_code code = GET_CODE (op);
21037
21038   switch (code)
21039     {
21040     case ROTATE:
21041       if (!CONST_INT_P (XEXP (op, 1)))
21042         {
21043           output_operand_lossage ("invalid shift operand");
21044           return NULL;
21045         }
21046
21047       code = ROTATERT;
21048       *amountp = 32 - INTVAL (XEXP (op, 1));
21049       mnem = "ror";
21050       break;
21051
21052     case ASHIFT:
21053     case ASHIFTRT:
21054     case LSHIFTRT:
21055     case ROTATERT:
21056       mnem = arm_shift_nmem(code);
21057       if (CONST_INT_P (XEXP (op, 1)))
21058         {
21059           *amountp = INTVAL (XEXP (op, 1));
21060         }
21061       else if (REG_P (XEXP (op, 1)))
21062         {
21063           *amountp = -1;
21064           return mnem;
21065         }
21066       else
21067         {
21068           output_operand_lossage ("invalid shift operand");
21069           return NULL;
21070         }
21071       break;
21072
21073     case MULT:
21074       /* We never have to worry about the amount being other than a
21075          power of 2, since this case can never be reloaded from a reg.  */
21076       if (!CONST_INT_P (XEXP (op, 1)))
21077         {
21078           output_operand_lossage ("invalid shift operand");
21079           return NULL;
21080         }
21081
21082       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21083
21084       /* Amount must be a power of two.  */
21085       if (*amountp & (*amountp - 1))
21086         {
21087           output_operand_lossage ("invalid shift operand");
21088           return NULL;
21089         }
21090
21091       *amountp = exact_log2 (*amountp);
21092       gcc_assert (IN_RANGE (*amountp, 0, 31));
21093       return ARM_LSL_NAME;
21094
21095     default:
21096       output_operand_lossage ("invalid shift operand");
21097       return NULL;
21098     }
21099
21100   /* This is not 100% correct, but follows from the desire to merge
21101      multiplication by a power of 2 with the recognizer for a
21102      shift.  >=32 is not a valid shift for "lsl", so we must try and
21103      output a shift that produces the correct arithmetical result.
21104      Using lsr #32 is identical except for the fact that the carry bit
21105      is not set correctly if we set the flags; but we never use the
21106      carry bit from such an operation, so we can ignore that.  */
21107   if (code == ROTATERT)
21108     /* Rotate is just modulo 32.  */
21109     *amountp &= 31;
21110   else if (*amountp != (*amountp & 31))
21111     {
21112       if (code == ASHIFT)
21113         mnem = "lsr";
21114       *amountp = 32;
21115     }
21116
21117   /* Shifts of 0 are no-ops.  */
21118   if (*amountp == 0)
21119     return NULL;
21120
21121   return mnem;
21122 }
21123
21124 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
21125    because /bin/as is horribly restrictive.  The judgement about
21126    whether or not each character is 'printable' (and can be output as
21127    is) or not (and must be printed with an octal escape) must be made
21128    with reference to the *host* character set -- the situation is
21129    similar to that discussed in the comments above pp_c_char in
21130    c-pretty-print.cc.  */
21131
21132 #define MAX_ASCII_LEN 51
21133
21134 void
21135 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21136 {
21137   int i;
21138   int len_so_far = 0;
21139
21140   fputs ("\t.ascii\t\"", stream);
21141
21142   for (i = 0; i < len; i++)
21143     {
21144       int c = p[i];
21145
21146       if (len_so_far >= MAX_ASCII_LEN)
21147         {
21148           fputs ("\"\n\t.ascii\t\"", stream);
21149           len_so_far = 0;
21150         }
21151
21152       if (ISPRINT (c))
21153         {
21154           if (c == '\\' || c == '\"')
21155             {
21156               putc ('\\', stream);
21157               len_so_far++;
21158             }
21159           putc (c, stream);
21160           len_so_far++;
21161         }
21162       else
21163         {
21164           fprintf (stream, "\\%03o", c);
21165           len_so_far += 4;
21166         }
21167     }
21168
21169   fputs ("\"\n", stream);
21170 }
21171 \f
21172
21173 /* Compute the register save mask for registers 0 through 12
21174    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
21175
21176 static unsigned long
21177 arm_compute_save_reg0_reg12_mask (void)
21178 {
21179   unsigned long func_type = arm_current_func_type ();
21180   unsigned long save_reg_mask = 0;
21181   unsigned int reg;
21182
21183   if (IS_INTERRUPT (func_type))
21184     {
21185       unsigned int max_reg;
21186       /* Interrupt functions must not corrupt any registers,
21187          even call clobbered ones.  If this is a leaf function
21188          we can just examine the registers used by the RTL, but
21189          otherwise we have to assume that whatever function is
21190          called might clobber anything, and so we have to save
21191          all the call-clobbered registers as well.  */
21192       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21193         /* FIQ handlers have registers r8 - r12 banked, so
21194            we only need to check r0 - r7, Normal ISRs only
21195            bank r14 and r15, so we must check up to r12.
21196            r13 is the stack pointer which is always preserved,
21197            so we do not need to consider it here.  */
21198         max_reg = 7;
21199       else
21200         max_reg = 12;
21201
21202       for (reg = 0; reg <= max_reg; reg++)
21203         if (reg_needs_saving_p (reg))
21204           save_reg_mask |= (1 << reg);
21205
21206       /* Also save the pic base register if necessary.  */
21207       if (PIC_REGISTER_MAY_NEED_SAVING
21208           && crtl->uses_pic_offset_table)
21209         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21210     }
21211   else if (IS_VOLATILE(func_type))
21212     {
21213       /* For noreturn functions we historically omitted register saves
21214          altogether.  However this really messes up debugging.  As a
21215          compromise save just the frame pointers.  Combined with the link
21216          register saved elsewhere this should be sufficient to get
21217          a backtrace.  */
21218       if (frame_pointer_needed)
21219         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21220       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21221         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21222       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21223         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21224     }
21225   else
21226     {
21227       /* In the normal case we only need to save those registers
21228          which are call saved and which are used by this function.  */
21229       for (reg = 0; reg <= 11; reg++)
21230         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21231           save_reg_mask |= (1 << reg);
21232
21233       /* Handle the frame pointer as a special case.  */
21234       if (frame_pointer_needed)
21235         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21236
21237       /* If we aren't loading the PIC register,
21238          don't stack it even though it may be live.  */
21239       if (PIC_REGISTER_MAY_NEED_SAVING
21240           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21241               || crtl->uses_pic_offset_table))
21242         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21243
21244       /* The prologue will copy SP into R0, so save it.  */
21245       if (IS_STACKALIGN (func_type))
21246         save_reg_mask |= 1;
21247     }
21248
21249   /* Save registers so the exception handler can modify them.  */
21250   if (crtl->calls_eh_return)
21251     {
21252       unsigned int i;
21253
21254       for (i = 0; ; i++)
21255         {
21256           reg = EH_RETURN_DATA_REGNO (i);
21257           if (reg == INVALID_REGNUM)
21258             break;
21259           save_reg_mask |= 1 << reg;
21260         }
21261     }
21262
21263   return save_reg_mask;
21264 }
21265
21266 /* Return true if r3 is live at the start of the function.  */
21267
21268 static bool
21269 arm_r3_live_at_start_p (void)
21270 {
21271   /* Just look at cfg info, which is still close enough to correct at this
21272      point.  This gives false positives for broken functions that might use
21273      uninitialized data that happens to be allocated in r3, but who cares?  */
21274   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21275 }
21276
21277 /* Compute the number of bytes used to store the static chain register on the
21278    stack, above the stack frame.  We need to know this accurately to get the
21279    alignment of the rest of the stack frame correct.  */
21280
21281 static int
21282 arm_compute_static_chain_stack_bytes (void)
21283 {
21284   /* Once the value is updated from the init value of -1, do not
21285      re-compute.  */
21286   if (cfun->machine->static_chain_stack_bytes != -1)
21287     return cfun->machine->static_chain_stack_bytes;
21288
21289   /* See the defining assertion in arm_expand_prologue.  */
21290   if (IS_NESTED (arm_current_func_type ())
21291       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21292           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21293                || flag_stack_clash_protection)
21294               && !df_regs_ever_live_p (LR_REGNUM)))
21295       && arm_r3_live_at_start_p ()
21296       && crtl->args.pretend_args_size == 0)
21297     return 4;
21298
21299   return 0;
21300 }
21301
21302 /* Compute a bit mask of which core registers need to be
21303    saved on the stack for the current function.
21304    This is used by arm_compute_frame_layout, which may add extra registers.  */
21305
21306 static unsigned long
21307 arm_compute_save_core_reg_mask (void)
21308 {
21309   unsigned int save_reg_mask = 0;
21310   unsigned long func_type = arm_current_func_type ();
21311   unsigned int reg;
21312
21313   if (IS_NAKED (func_type))
21314     /* This should never really happen.  */
21315     return 0;
21316
21317   /* If we are creating a stack frame, then we must save the frame pointer,
21318      IP (which will hold the old stack pointer), LR and the PC.  */
21319   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21320     save_reg_mask |=
21321       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21322       | (1 << IP_REGNUM)
21323       | (1 << LR_REGNUM)
21324       | (1 << PC_REGNUM);
21325
21326   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21327
21328   if (arm_current_function_pac_enabled_p ())
21329     save_reg_mask |= 1 << IP_REGNUM;
21330
21331   /* Decide if we need to save the link register.
21332      Interrupt routines have their own banked link register,
21333      so they never need to save it.
21334      Otherwise if we do not use the link register we do not need to save
21335      it.  If we are pushing other registers onto the stack however, we
21336      can save an instruction in the epilogue by pushing the link register
21337      now and then popping it back into the PC.  This incurs extra memory
21338      accesses though, so we only do it when optimizing for size, and only
21339      if we know that we will not need a fancy return sequence.  */
21340   if (df_regs_ever_live_p (LR_REGNUM)
21341       || (save_reg_mask
21342           && optimize_size
21343           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21344           && !crtl->tail_call_emit
21345           && !crtl->calls_eh_return))
21346     save_reg_mask |= 1 << LR_REGNUM;
21347
21348   if (cfun->machine->lr_save_eliminated)
21349     save_reg_mask &= ~ (1 << LR_REGNUM);
21350
21351   if (TARGET_REALLY_IWMMXT
21352       && ((bit_count (save_reg_mask)
21353            + ARM_NUM_INTS (crtl->args.pretend_args_size +
21354                            arm_compute_static_chain_stack_bytes())
21355            ) % 2) != 0)
21356     {
21357       /* The total number of registers that are going to be pushed
21358          onto the stack is odd.  We need to ensure that the stack
21359          is 64-bit aligned before we start to save iWMMXt registers,
21360          and also before we start to create locals.  (A local variable
21361          might be a double or long long which we will load/store using
21362          an iWMMXt instruction).  Therefore we need to push another
21363          ARM register, so that the stack will be 64-bit aligned.  We
21364          try to avoid using the arg registers (r0 -r3) as they might be
21365          used to pass values in a tail call.  */
21366       for (reg = 4; reg <= 12; reg++)
21367         if ((save_reg_mask & (1 << reg)) == 0)
21368           break;
21369
21370       if (reg <= 12)
21371         save_reg_mask |= (1 << reg);
21372       else
21373         {
21374           cfun->machine->sibcall_blocked = 1;
21375           save_reg_mask |= (1 << 3);
21376         }
21377     }
21378
21379   /* We may need to push an additional register for use initializing the
21380      PIC base register.  */
21381   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21382       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21383     {
21384       reg = thumb_find_work_register (1 << 4);
21385       if (!call_used_or_fixed_reg_p (reg))
21386         save_reg_mask |= (1 << reg);
21387     }
21388
21389   return save_reg_mask;
21390 }
21391
21392 /* Compute a bit mask of which core registers need to be
21393    saved on the stack for the current function.  */
21394 static unsigned long
21395 thumb1_compute_save_core_reg_mask (void)
21396 {
21397   unsigned long mask;
21398   unsigned reg;
21399
21400   mask = 0;
21401   for (reg = 0; reg < 12; reg ++)
21402     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21403       mask |= 1 << reg;
21404
21405   /* Handle the frame pointer as a special case.  */
21406   if (frame_pointer_needed)
21407     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21408
21409   if (flag_pic
21410       && !TARGET_SINGLE_PIC_BASE
21411       && arm_pic_register != INVALID_REGNUM
21412       && crtl->uses_pic_offset_table)
21413     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21414
21415   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21416   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21417     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21418
21419   /* LR will also be pushed if any lo regs are pushed.  */
21420   if (mask & 0xff || thumb_force_lr_save ())
21421     mask |= (1 << LR_REGNUM);
21422
21423   bool call_clobbered_scratch
21424     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21425        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21426
21427   /* Make sure we have a low work register if we need one.  We will
21428      need one if we are going to push a high register, but we are not
21429      currently intending to push a low register.  However if both the
21430      prologue and epilogue have a spare call-clobbered low register,
21431      then we won't need to find an additional work register.  It does
21432      not need to be the same register in the prologue and
21433      epilogue.  */
21434   if ((mask & 0xff) == 0
21435       && !call_clobbered_scratch
21436       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21437     {
21438       /* Use thumb_find_work_register to choose which register
21439          we will use.  If the register is live then we will
21440          have to push it.  Use LAST_LO_REGNUM as our fallback
21441          choice for the register to select.  */
21442       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21443       /* Make sure the register returned by thumb_find_work_register is
21444          not part of the return value.  */
21445       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21446         reg = LAST_LO_REGNUM;
21447
21448       if (callee_saved_reg_p (reg))
21449         mask |= 1 << reg;
21450     }
21451
21452   /* The 504 below is 8 bytes less than 512 because there are two possible
21453      alignment words.  We can't tell here if they will be present or not so we
21454      have to play it safe and assume that they are. */
21455   if ((CALLER_INTERWORKING_SLOT_SIZE +
21456        ROUND_UP_WORD (get_frame_size ()) +
21457        crtl->outgoing_args_size) >= 504)
21458     {
21459       /* This is the same as the code in thumb1_expand_prologue() which
21460          determines which register to use for stack decrement. */
21461       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21462         if (mask & (1 << reg))
21463           break;
21464
21465       if (reg > LAST_LO_REGNUM)
21466         {
21467           /* Make sure we have a register available for stack decrement. */
21468           mask |= 1 << LAST_LO_REGNUM;
21469         }
21470     }
21471
21472   return mask;
21473 }
21474
21475 /* Return the number of bytes required to save VFP registers.  */
21476 static int
21477 arm_get_vfp_saved_size (void)
21478 {
21479   unsigned int regno;
21480   int count;
21481   int saved;
21482
21483   saved = 0;
21484   /* Space for saved VFP registers.  */
21485   if (TARGET_VFP_BASE)
21486     {
21487       count = 0;
21488       for (regno = FIRST_VFP_REGNUM;
21489            regno < LAST_VFP_REGNUM;
21490            regno += 2)
21491         {
21492           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21493             {
21494               if (count > 0)
21495                 {
21496                   /* Workaround ARM10 VFPr1 bug.  */
21497                   if (count == 2 && !arm_arch6)
21498                     count++;
21499                   saved += count * 8;
21500                 }
21501               count = 0;
21502             }
21503           else
21504             count++;
21505         }
21506       if (count > 0)
21507         {
21508           if (count == 2 && !arm_arch6)
21509             count++;
21510           saved += count * 8;
21511         }
21512     }
21513   return saved;
21514 }
21515
21516
21517 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21518    everything bar the final return instruction.  If simple_return is true,
21519    then do not output epilogue, because it has already been emitted in RTL.
21520
21521    Note: do not forget to update length attribute of corresponding insn pattern
21522    when changing assembly output (eg. length attribute of
21523    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21524    register clearing sequences).  */
21525 const char *
21526 output_return_instruction (rtx operand, bool really_return, bool reverse,
21527                            bool simple_return)
21528 {
21529   char conditional[10];
21530   char instr[100];
21531   unsigned reg;
21532   unsigned long live_regs_mask;
21533   unsigned long func_type;
21534   arm_stack_offsets *offsets;
21535
21536   func_type = arm_current_func_type ();
21537
21538   if (IS_NAKED (func_type))
21539     return "";
21540
21541   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21542     {
21543       /* If this function was declared non-returning, and we have
21544          found a tail call, then we have to trust that the called
21545          function won't return.  */
21546       if (really_return)
21547         {
21548           rtx ops[2];
21549
21550           /* Otherwise, trap an attempted return by aborting.  */
21551           ops[0] = operand;
21552           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21553                                        : "abort");
21554           assemble_external_libcall (ops[1]);
21555           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21556         }
21557
21558       return "";
21559     }
21560
21561   gcc_assert (!cfun->calls_alloca || really_return);
21562
21563   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21564
21565   cfun->machine->return_used_this_function = 1;
21566
21567   offsets = arm_get_frame_offsets ();
21568   live_regs_mask = offsets->saved_regs_mask;
21569
21570   if (!simple_return && live_regs_mask)
21571     {
21572       const char * return_reg;
21573
21574       /* If we do not have any special requirements for function exit
21575          (e.g. interworking) then we can load the return address
21576          directly into the PC.  Otherwise we must load it into LR.  */
21577       if (really_return
21578           && !IS_CMSE_ENTRY (func_type)
21579           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21580         return_reg = reg_names[PC_REGNUM];
21581       else
21582         return_reg = reg_names[LR_REGNUM];
21583
21584       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21585         {
21586           /* There are three possible reasons for the IP register
21587              being saved.  1) a stack frame was created, in which case
21588              IP contains the old stack pointer, or 2) an ISR routine
21589              corrupted it, or 3) it was saved to align the stack on
21590              iWMMXt.  In case 1, restore IP into SP, otherwise just
21591              restore IP.  */
21592           if (frame_pointer_needed)
21593             {
21594               live_regs_mask &= ~ (1 << IP_REGNUM);
21595               live_regs_mask |=   (1 << SP_REGNUM);
21596             }
21597           else
21598             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21599         }
21600
21601       /* On some ARM architectures it is faster to use LDR rather than
21602          LDM to load a single register.  On other architectures, the
21603          cost is the same.  In 26 bit mode, or for exception handlers,
21604          we have to use LDM to load the PC so that the CPSR is also
21605          restored.  */
21606       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21607         if (live_regs_mask == (1U << reg))
21608           break;
21609
21610       if (reg <= LAST_ARM_REGNUM
21611           && (reg != LR_REGNUM
21612               || ! really_return
21613               || ! IS_INTERRUPT (func_type)))
21614         {
21615           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21616                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21617         }
21618       else
21619         {
21620           char *p;
21621           int first = 1;
21622
21623           /* Generate the load multiple instruction to restore the
21624              registers.  Note we can get here, even if
21625              frame_pointer_needed is true, but only if sp already
21626              points to the base of the saved core registers.  */
21627           if (live_regs_mask & (1 << SP_REGNUM))
21628             {
21629               unsigned HOST_WIDE_INT stack_adjust;
21630
21631               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21632               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21633
21634               if (stack_adjust && arm_arch5t && TARGET_ARM)
21635                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21636               else
21637                 {
21638                   /* If we can't use ldmib (SA110 bug),
21639                      then try to pop r3 instead.  */
21640                   if (stack_adjust)
21641                     live_regs_mask |= 1 << 3;
21642
21643                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21644                 }
21645             }
21646           /* For interrupt returns we have to use an LDM rather than
21647              a POP so that we can use the exception return variant.  */
21648           else if (IS_INTERRUPT (func_type))
21649             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21650           else
21651             sprintf (instr, "pop%s\t{", conditional);
21652
21653           p = instr + strlen (instr);
21654
21655           for (reg = 0; reg <= SP_REGNUM; reg++)
21656             if (live_regs_mask & (1 << reg))
21657               {
21658                 int l = strlen (reg_names[reg]);
21659
21660                 if (first)
21661                   first = 0;
21662                 else
21663                   {
21664                     memcpy (p, ", ", 2);
21665                     p += 2;
21666                   }
21667
21668                 memcpy (p, "%|", 2);
21669                 memcpy (p + 2, reg_names[reg], l);
21670                 p += l + 2;
21671               }
21672
21673           if (live_regs_mask & (1 << LR_REGNUM))
21674             {
21675               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21676               /* If returning from an interrupt, restore the CPSR.  */
21677               if (IS_INTERRUPT (func_type))
21678                 strcat (p, "^");
21679             }
21680           else
21681             strcpy (p, "}");
21682         }
21683
21684       output_asm_insn (instr, & operand);
21685
21686       /* See if we need to generate an extra instruction to
21687          perform the actual function return.  */
21688       if (really_return
21689           && func_type != ARM_FT_INTERWORKED
21690           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21691         {
21692           /* The return has already been handled
21693              by loading the LR into the PC.  */
21694           return "";
21695         }
21696     }
21697
21698   if (really_return)
21699     {
21700       switch ((int) ARM_FUNC_TYPE (func_type))
21701         {
21702         case ARM_FT_ISR:
21703         case ARM_FT_FIQ:
21704           /* ??? This is wrong for unified assembly syntax.  */
21705           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21706           break;
21707
21708         case ARM_FT_INTERWORKED:
21709           gcc_assert (arm_arch5t || arm_arch4t);
21710           sprintf (instr, "bx%s\t%%|lr", conditional);
21711           break;
21712
21713         case ARM_FT_EXCEPTION:
21714           /* ??? This is wrong for unified assembly syntax.  */
21715           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21716           break;
21717
21718         default:
21719           if (IS_CMSE_ENTRY (func_type))
21720             {
21721               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21722                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21723                  VSTR/VLDR instructions in the prologue and epilogue.  */
21724               if (!TARGET_HAVE_FPCXT_CMSE)
21725                 {
21726                   /* Check if we have to clear the 'GE bits' which is only used if
21727                      parallel add and subtraction instructions are available.  */
21728                   if (TARGET_INT_SIMD)
21729                     snprintf (instr, sizeof (instr),
21730                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21731                   else
21732                     snprintf (instr, sizeof (instr),
21733                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21734
21735                   output_asm_insn (instr, & operand);
21736                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21737                      care of it.  */
21738                   if (TARGET_HARD_FLOAT)
21739                     {
21740                       /* Clear the cumulative exception-status bits (0-4,7) and
21741                          the condition code bits (28-31) of the FPSCR.  We need
21742                          to remember to clear the first scratch register used
21743                          (IP) and save and restore the second (r4).
21744
21745                          Important note: the length of the
21746                          thumb2_cmse_entry_return insn pattern must account for
21747                          the size of the below instructions.  */
21748                       output_asm_insn ("push\t{%|r4}", & operand);
21749                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21750                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21751                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21752                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21753                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21754                       output_asm_insn ("pop\t{%|r4}", & operand);
21755                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21756                     }
21757                 }
21758               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21759             }
21760           /* Use bx if it's available.  */
21761           else if (arm_arch5t || arm_arch4t)
21762             sprintf (instr, "bx%s\t%%|lr", conditional);
21763           else
21764             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21765           break;
21766         }
21767
21768       output_asm_insn (instr, & operand);
21769     }
21770
21771   return "";
21772 }
21773
21774 /* Output in FILE asm statements needed to declare the NAME of the function
21775    defined by its DECL node.  */
21776
21777 void
21778 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21779 {
21780   size_t cmse_name_len;
21781   char *cmse_name = 0;
21782   char cmse_prefix[] = "__acle_se_";
21783
21784   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21785      extra function label for each function with the 'cmse_nonsecure_entry'
21786      attribute.  This extra function label should be prepended with
21787      '__acle_se_', telling the linker that it needs to create secure gateway
21788      veneers for this function.  */
21789   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21790                                     DECL_ATTRIBUTES (decl)))
21791     {
21792       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21793       cmse_name = XALLOCAVEC (char, cmse_name_len);
21794       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21795       targetm.asm_out.globalize_label (file, cmse_name);
21796
21797       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21798       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21799     }
21800
21801   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21802   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21803   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21804   ASM_OUTPUT_LABEL (file, name);
21805
21806   if (cmse_name)
21807     ASM_OUTPUT_LABEL (file, cmse_name);
21808
21809   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21810 }
21811
21812 /* Write the function name into the code section, directly preceding
21813    the function prologue.
21814
21815    Code will be output similar to this:
21816      t0
21817          .ascii "arm_poke_function_name", 0
21818          .align
21819      t1
21820          .word 0xff000000 + (t1 - t0)
21821      arm_poke_function_name
21822          mov     ip, sp
21823          stmfd   sp!, {fp, ip, lr, pc}
21824          sub     fp, ip, #4
21825
21826    When performing a stack backtrace, code can inspect the value
21827    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21828    at location pc - 12 and the top 8 bits are set, then we know
21829    that there is a function name embedded immediately preceding this
21830    location and has length ((pc[-3]) & 0xff000000).
21831
21832    We assume that pc is declared as a pointer to an unsigned long.
21833
21834    It is of no benefit to output the function name if we are assembling
21835    a leaf function.  These function types will not contain a stack
21836    backtrace structure, therefore it is not possible to determine the
21837    function name.  */
21838 void
21839 arm_poke_function_name (FILE *stream, const char *name)
21840 {
21841   unsigned long alignlength;
21842   unsigned long length;
21843   rtx           x;
21844
21845   length      = strlen (name) + 1;
21846   alignlength = ROUND_UP_WORD (length);
21847
21848   ASM_OUTPUT_ASCII (stream, name, length);
21849   ASM_OUTPUT_ALIGN (stream, 2);
21850   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21851   assemble_aligned_integer (UNITS_PER_WORD, x);
21852 }
21853
21854 /* Place some comments into the assembler stream
21855    describing the current function.  */
21856 static void
21857 arm_output_function_prologue (FILE *f)
21858 {
21859   unsigned long func_type;
21860
21861   /* Sanity check.  */
21862   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21863
21864   func_type = arm_current_func_type ();
21865
21866   switch ((int) ARM_FUNC_TYPE (func_type))
21867     {
21868     default:
21869     case ARM_FT_NORMAL:
21870       break;
21871     case ARM_FT_INTERWORKED:
21872       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21873       break;
21874     case ARM_FT_ISR:
21875       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21876       break;
21877     case ARM_FT_FIQ:
21878       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21879       break;
21880     case ARM_FT_EXCEPTION:
21881       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21882       break;
21883     }
21884
21885   if (IS_NAKED (func_type))
21886     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21887
21888   if (IS_VOLATILE (func_type))
21889     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21890
21891   if (IS_NESTED (func_type))
21892     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21893   if (IS_STACKALIGN (func_type))
21894     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21895   if (IS_CMSE_ENTRY (func_type))
21896     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21897
21898   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21899                (HOST_WIDE_INT) crtl->args.size,
21900                crtl->args.pretend_args_size,
21901                (HOST_WIDE_INT) get_frame_size ());
21902
21903   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21904                frame_pointer_needed,
21905                cfun->machine->uses_anonymous_args);
21906
21907   if (cfun->machine->lr_save_eliminated)
21908     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21909
21910   if (crtl->calls_eh_return)
21911     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21912
21913 }
21914
21915 static void
21916 arm_output_function_epilogue (FILE *)
21917 {
21918   arm_stack_offsets *offsets;
21919
21920   if (TARGET_THUMB1)
21921     {
21922       int regno;
21923
21924       /* Emit any call-via-reg trampolines that are needed for v4t support
21925          of call_reg and call_value_reg type insns.  */
21926       for (regno = 0; regno < LR_REGNUM; regno++)
21927         {
21928           rtx label = cfun->machine->call_via[regno];
21929
21930           if (label != NULL)
21931             {
21932               switch_to_section (function_section (current_function_decl));
21933               targetm.asm_out.internal_label (asm_out_file, "L",
21934                                               CODE_LABEL_NUMBER (label));
21935               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21936             }
21937         }
21938
21939       /* ??? Probably not safe to set this here, since it assumes that a
21940          function will be emitted as assembly immediately after we generate
21941          RTL for it.  This does not happen for inline functions.  */
21942       cfun->machine->return_used_this_function = 0;
21943     }
21944   else /* TARGET_32BIT */
21945     {
21946       /* We need to take into account any stack-frame rounding.  */
21947       offsets = arm_get_frame_offsets ();
21948
21949       gcc_assert (!use_return_insn (FALSE, NULL)
21950                   || (cfun->machine->return_used_this_function != 0)
21951                   || offsets->saved_regs == offsets->outgoing_args
21952                   || frame_pointer_needed);
21953     }
21954 }
21955
21956 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21957    STR and STRD.  If an even number of registers are being pushed, one
21958    or more STRD patterns are created for each register pair.  If an
21959    odd number of registers are pushed, emit an initial STR followed by
21960    as many STRD instructions as are needed.  This works best when the
21961    stack is initially 64-bit aligned (the normal case), since it
21962    ensures that each STRD is also 64-bit aligned.  */
21963 static void
21964 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21965 {
21966   int num_regs = 0;
21967   int i;
21968   int regno;
21969   rtx par = NULL_RTX;
21970   rtx dwarf = NULL_RTX;
21971   rtx tmp;
21972   bool first = true;
21973
21974   num_regs = bit_count (saved_regs_mask);
21975
21976   /* Must be at least one register to save, and can't save SP or PC.  */
21977   gcc_assert (num_regs > 0 && num_regs <= 14);
21978   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21979   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21980
21981   /* Create sequence for DWARF info.  All the frame-related data for
21982      debugging is held in this wrapper.  */
21983   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21984
21985   /* Describe the stack adjustment.  */
21986   tmp = gen_rtx_SET (stack_pointer_rtx,
21987                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21988   RTX_FRAME_RELATED_P (tmp) = 1;
21989   XVECEXP (dwarf, 0, 0) = tmp;
21990
21991   /* Find the first register.  */
21992   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21993     ;
21994
21995   i = 0;
21996
21997   /* If there's an odd number of registers to push.  Start off by
21998      pushing a single register.  This ensures that subsequent strd
21999      operations are dword aligned (assuming that SP was originally
22000      64-bit aligned).  */
22001   if ((num_regs & 1) != 0)
22002     {
22003       rtx reg, mem, insn;
22004
22005       reg = gen_rtx_REG (SImode, regno);
22006       if (num_regs == 1)
22007         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
22008                                                      stack_pointer_rtx));
22009       else
22010         mem = gen_frame_mem (Pmode,
22011                              gen_rtx_PRE_MODIFY
22012                              (Pmode, stack_pointer_rtx,
22013                               plus_constant (Pmode, stack_pointer_rtx,
22014                                              -4 * num_regs)));
22015
22016       tmp = gen_rtx_SET (mem, reg);
22017       RTX_FRAME_RELATED_P (tmp) = 1;
22018       insn = emit_insn (tmp);
22019       RTX_FRAME_RELATED_P (insn) = 1;
22020       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22021       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
22022       RTX_FRAME_RELATED_P (tmp) = 1;
22023       i++;
22024       regno++;
22025       XVECEXP (dwarf, 0, i) = tmp;
22026       first = false;
22027     }
22028
22029   while (i < num_regs)
22030     if (saved_regs_mask & (1 << regno))
22031       {
22032         rtx reg1, reg2, mem1, mem2;
22033         rtx tmp0, tmp1, tmp2;
22034         int regno2;
22035
22036         /* Find the register to pair with this one.  */
22037         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
22038              regno2++)
22039           ;
22040
22041         reg1 = gen_rtx_REG (SImode, regno);
22042         reg2 = gen_rtx_REG (SImode, regno2);
22043
22044         if (first)
22045           {
22046             rtx insn;
22047
22048             first = false;
22049             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22050                                                         stack_pointer_rtx,
22051                                                         -4 * num_regs));
22052             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22053                                                         stack_pointer_rtx,
22054                                                         -4 * (num_regs - 1)));
22055             tmp0 = gen_rtx_SET (stack_pointer_rtx,
22056                                 plus_constant (Pmode, stack_pointer_rtx,
22057                                                -4 * (num_regs)));
22058             tmp1 = gen_rtx_SET (mem1, reg1);
22059             tmp2 = gen_rtx_SET (mem2, reg2);
22060             RTX_FRAME_RELATED_P (tmp0) = 1;
22061             RTX_FRAME_RELATED_P (tmp1) = 1;
22062             RTX_FRAME_RELATED_P (tmp2) = 1;
22063             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22064             XVECEXP (par, 0, 0) = tmp0;
22065             XVECEXP (par, 0, 1) = tmp1;
22066             XVECEXP (par, 0, 2) = tmp2;
22067             insn = emit_insn (par);
22068             RTX_FRAME_RELATED_P (insn) = 1;
22069             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22070           }
22071         else
22072           {
22073             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22074                                                         stack_pointer_rtx,
22075                                                         4 * i));
22076             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22077                                                         stack_pointer_rtx,
22078                                                         4 * (i + 1)));
22079             tmp1 = gen_rtx_SET (mem1, reg1);
22080             tmp2 = gen_rtx_SET (mem2, reg2);
22081             RTX_FRAME_RELATED_P (tmp1) = 1;
22082             RTX_FRAME_RELATED_P (tmp2) = 1;
22083             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22084             XVECEXP (par, 0, 0) = tmp1;
22085             XVECEXP (par, 0, 1) = tmp2;
22086             emit_insn (par);
22087           }
22088
22089         /* Create unwind information.  This is an approximation.  */
22090         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22091                                            plus_constant (Pmode,
22092                                                           stack_pointer_rtx,
22093                                                           4 * i)),
22094                             reg1);
22095         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22096                                            plus_constant (Pmode,
22097                                                           stack_pointer_rtx,
22098                                                           4 * (i + 1))),
22099                             reg2);
22100
22101         RTX_FRAME_RELATED_P (tmp1) = 1;
22102         RTX_FRAME_RELATED_P (tmp2) = 1;
22103         XVECEXP (dwarf, 0, i + 1) = tmp1;
22104         XVECEXP (dwarf, 0, i + 2) = tmp2;
22105         i += 2;
22106         regno = regno2 + 1;
22107       }
22108     else
22109       regno++;
22110
22111   return;
22112 }
22113
22114 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
22115    whenever possible, otherwise it emits single-word stores.  The first store
22116    also allocates stack space for all saved registers, using writeback with
22117    post-addressing mode.  All other stores use offset addressing.  If no STRD
22118    can be emitted, this function emits a sequence of single-word stores,
22119    and not an STM as before, because single-word stores provide more freedom
22120    scheduling and can be turned into an STM by peephole optimizations.  */
22121 static void
22122 arm_emit_strd_push (unsigned long saved_regs_mask)
22123 {
22124   int num_regs = 0;
22125   int i, j, dwarf_index  = 0;
22126   int offset = 0;
22127   rtx dwarf = NULL_RTX;
22128   rtx insn = NULL_RTX;
22129   rtx tmp, mem;
22130
22131   /* TODO: A more efficient code can be emitted by changing the
22132      layout, e.g., first push all pairs that can use STRD to keep the
22133      stack aligned, and then push all other registers.  */
22134   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22135     if (saved_regs_mask & (1 << i))
22136       num_regs++;
22137
22138   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22139   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22140   gcc_assert (num_regs > 0);
22141
22142   /* Create sequence for DWARF info.  */
22143   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22144
22145   /* For dwarf info, we generate explicit stack update.  */
22146   tmp = gen_rtx_SET (stack_pointer_rtx,
22147                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22148   RTX_FRAME_RELATED_P (tmp) = 1;
22149   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22150
22151   /* Save registers.  */
22152   offset = - 4 * num_regs;
22153   j = 0;
22154   while (j <= LAST_ARM_REGNUM)
22155     if (saved_regs_mask & (1 << j))
22156       {
22157         if ((j % 2 == 0)
22158             && (saved_regs_mask & (1 << (j + 1))))
22159           {
22160             /* Current register and previous register form register pair for
22161                which STRD can be generated.  */
22162             if (offset < 0)
22163               {
22164                 /* Allocate stack space for all saved registers.  */
22165                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22166                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22167                 mem = gen_frame_mem (DImode, tmp);
22168                 offset = 0;
22169               }
22170             else if (offset > 0)
22171               mem = gen_frame_mem (DImode,
22172                                    plus_constant (Pmode,
22173                                                   stack_pointer_rtx,
22174                                                   offset));
22175             else
22176               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22177
22178             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22179             RTX_FRAME_RELATED_P (tmp) = 1;
22180             tmp = emit_insn (tmp);
22181
22182             /* Record the first store insn.  */
22183             if (dwarf_index == 1)
22184               insn = tmp;
22185
22186             /* Generate dwarf info.  */
22187             mem = gen_frame_mem (SImode,
22188                                  plus_constant (Pmode,
22189                                                 stack_pointer_rtx,
22190                                                 offset));
22191             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22192             RTX_FRAME_RELATED_P (tmp) = 1;
22193             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22194
22195             mem = gen_frame_mem (SImode,
22196                                  plus_constant (Pmode,
22197                                                 stack_pointer_rtx,
22198                                                 offset + 4));
22199             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22200             RTX_FRAME_RELATED_P (tmp) = 1;
22201             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22202
22203             offset += 8;
22204             j += 2;
22205           }
22206         else
22207           {
22208             /* Emit a single word store.  */
22209             if (offset < 0)
22210               {
22211                 /* Allocate stack space for all saved registers.  */
22212                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22213                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22214                 mem = gen_frame_mem (SImode, tmp);
22215                 offset = 0;
22216               }
22217             else if (offset > 0)
22218               mem = gen_frame_mem (SImode,
22219                                    plus_constant (Pmode,
22220                                                   stack_pointer_rtx,
22221                                                   offset));
22222             else
22223               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22224
22225             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22226             RTX_FRAME_RELATED_P (tmp) = 1;
22227             tmp = emit_insn (tmp);
22228
22229             /* Record the first store insn.  */
22230             if (dwarf_index == 1)
22231               insn = tmp;
22232
22233             /* Generate dwarf info.  */
22234             mem = gen_frame_mem (SImode,
22235                                  plus_constant(Pmode,
22236                                                stack_pointer_rtx,
22237                                                offset));
22238             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22239             RTX_FRAME_RELATED_P (tmp) = 1;
22240             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22241
22242             offset += 4;
22243             j += 1;
22244           }
22245       }
22246     else
22247       j++;
22248
22249   /* Attach dwarf info to the first insn we generate.  */
22250   gcc_assert (insn != NULL_RTX);
22251   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22252   RTX_FRAME_RELATED_P (insn) = 1;
22253 }
22254
22255 /* Generate and emit an insn that we will recognize as a push_multi.
22256    Unfortunately, since this insn does not reflect very well the actual
22257    semantics of the operation, we need to annotate the insn for the benefit
22258    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
22259    MASK for registers that should be annotated for DWARF2 frame unwind
22260    information.  */
22261 static rtx
22262 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22263 {
22264   int num_regs = 0;
22265   int num_dwarf_regs = 0;
22266   int i, j;
22267   rtx par;
22268   rtx dwarf;
22269   int dwarf_par_index;
22270   rtx tmp, reg;
22271
22272   /* We don't record the PC in the dwarf frame information.  */
22273   dwarf_regs_mask &= ~(1 << PC_REGNUM);
22274
22275   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22276     {
22277       if (mask & (1 << i))
22278         num_regs++;
22279       if (dwarf_regs_mask & (1 << i))
22280         num_dwarf_regs++;
22281     }
22282
22283   gcc_assert (num_regs && num_regs <= 16);
22284   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22285
22286   /* For the body of the insn we are going to generate an UNSPEC in
22287      parallel with several USEs.  This allows the insn to be recognized
22288      by the push_multi pattern in the arm.md file.
22289
22290      The body of the insn looks something like this:
22291
22292        (parallel [
22293            (set (mem:BLK (pre_modify:SI (reg:SI sp)
22294                                         (const_int:SI <num>)))
22295                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22296            (use (reg:SI XX))
22297            (use (reg:SI YY))
22298            ...
22299         ])
22300
22301      For the frame note however, we try to be more explicit and actually
22302      show each register being stored into the stack frame, plus a (single)
22303      decrement of the stack pointer.  We do it this way in order to be
22304      friendly to the stack unwinding code, which only wants to see a single
22305      stack decrement per instruction.  The RTL we generate for the note looks
22306      something like this:
22307
22308       (sequence [
22309            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22310            (set (mem:SI (reg:SI sp)) (reg:SI r4))
22311            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22312            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22313            ...
22314         ])
22315
22316      FIXME:: In an ideal world the PRE_MODIFY would not exist and
22317      instead we'd have a parallel expression detailing all
22318      the stores to the various memory addresses so that debug
22319      information is more up-to-date. Remember however while writing
22320      this to take care of the constraints with the push instruction.
22321
22322      Note also that this has to be taken care of for the VFP registers.
22323
22324      For more see PR43399.  */
22325
22326   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22327   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22328   dwarf_par_index = 1;
22329
22330   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22331     {
22332       if (mask & (1 << i))
22333         {
22334           /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22335              following example reg-reg copy of SP to IP register is handled
22336              through .cfi_def_cfa_register directive and the .cfi_offset
22337              directive for IP register is skipped by dwarf code emitter.
22338              Example:
22339                 mov     ip, sp
22340                 .cfi_def_cfa_register 12
22341                 push    {fp, ip, lr, pc}
22342                 .cfi_offset 11, -16
22343                 .cfi_offset 13, -12
22344                 .cfi_offset 14, -8
22345
22346              Where as Arm-specific .save directive handling is different to that
22347              of dwarf code emitter and it doesn't consider reg-reg copies while
22348              updating the register list.  When PACBTI is enabled we manually
22349              updated the .save directive register list to use "ra_auth_code"
22350              (pseduo register 143) instead of IP register as shown in following
22351              pseduo code.
22352              Example:
22353                 pacbti  ip, lr, sp
22354                 .cfi_register 143, 12
22355                 push    {r3, r7, ip, lr}
22356                 .save {r3, r7, ra_auth_code, lr}
22357           */
22358           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22359           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22360             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22361
22362           XVECEXP (par, 0, 0)
22363             = gen_rtx_SET (gen_frame_mem
22364                            (BLKmode,
22365                             gen_rtx_PRE_MODIFY (Pmode,
22366                                                 stack_pointer_rtx,
22367                                                 plus_constant
22368                                                 (Pmode, stack_pointer_rtx,
22369                                                  -4 * num_regs))
22370                             ),
22371                            gen_rtx_UNSPEC (BLKmode,
22372                                            gen_rtvec (1, reg),
22373                                            UNSPEC_PUSH_MULT));
22374
22375           if (dwarf_regs_mask & (1 << i))
22376             {
22377               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22378                                  dwarf_reg);
22379               RTX_FRAME_RELATED_P (tmp) = 1;
22380               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22381             }
22382
22383           break;
22384         }
22385     }
22386
22387   for (j = 1, i++; j < num_regs; i++)
22388     {
22389       if (mask & (1 << i))
22390         {
22391           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22392           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22393             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22394
22395           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22396
22397           if (dwarf_regs_mask & (1 << i))
22398             {
22399               tmp
22400                 = gen_rtx_SET (gen_frame_mem
22401                                (SImode,
22402                                 plus_constant (Pmode, stack_pointer_rtx,
22403                                                4 * j)),
22404                                dwarf_reg);
22405               RTX_FRAME_RELATED_P (tmp) = 1;
22406               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22407             }
22408
22409           j++;
22410         }
22411     }
22412
22413   par = emit_insn (par);
22414
22415   tmp = gen_rtx_SET (stack_pointer_rtx,
22416                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22417   RTX_FRAME_RELATED_P (tmp) = 1;
22418   XVECEXP (dwarf, 0, 0) = tmp;
22419
22420   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22421
22422   return par;
22423 }
22424
22425 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22426    SIZE is the offset to be adjusted.
22427    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22428 static void
22429 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22430 {
22431   rtx dwarf;
22432
22433   RTX_FRAME_RELATED_P (insn) = 1;
22434   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22435   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22436 }
22437
22438 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22439    SAVED_REGS_MASK shows which registers need to be restored.
22440
22441    Unfortunately, since this insn does not reflect very well the actual
22442    semantics of the operation, we need to annotate the insn for the benefit
22443    of DWARF2 frame unwind information.  */
22444 static void
22445 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22446 {
22447   int num_regs = 0;
22448   int i, j;
22449   rtx par;
22450   rtx dwarf = NULL_RTX;
22451   rtx tmp, reg;
22452   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22453   int offset_adj;
22454   int emit_update;
22455
22456   offset_adj = return_in_pc ? 1 : 0;
22457   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22458     if (saved_regs_mask & (1 << i))
22459       num_regs++;
22460
22461   gcc_assert (num_regs && num_regs <= 16);
22462
22463   /* If SP is in reglist, then we don't emit SP update insn.  */
22464   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22465
22466   /* The parallel needs to hold num_regs SETs
22467      and one SET for the stack update.  */
22468   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22469
22470   if (return_in_pc)
22471     XVECEXP (par, 0, 0) = ret_rtx;
22472
22473   if (emit_update)
22474     {
22475       /* Increment the stack pointer, based on there being
22476          num_regs 4-byte registers to restore.  */
22477       tmp = gen_rtx_SET (stack_pointer_rtx,
22478                          plus_constant (Pmode,
22479                                         stack_pointer_rtx,
22480                                         4 * num_regs));
22481       RTX_FRAME_RELATED_P (tmp) = 1;
22482       XVECEXP (par, 0, offset_adj) = tmp;
22483     }
22484
22485   /* Now restore every reg, which may include PC.  */
22486   for (j = 0, i = 0; j < num_regs; i++)
22487     if (saved_regs_mask & (1 << i))
22488       {
22489         rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22490         if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22491           dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22492         if ((num_regs == 1) && emit_update && !return_in_pc)
22493           {
22494             /* Emit single load with writeback.  */
22495             tmp = gen_frame_mem (SImode,
22496                                  gen_rtx_POST_INC (Pmode,
22497                                                    stack_pointer_rtx));
22498             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22499             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22500                                               dwarf);
22501             return;
22502           }
22503
22504         tmp = gen_rtx_SET (reg,
22505                            gen_frame_mem
22506                            (SImode,
22507                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22508         RTX_FRAME_RELATED_P (tmp) = 1;
22509         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22510
22511         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22512            should not have PC, skip PC.  */
22513         if (i != PC_REGNUM)
22514           dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22515
22516         j++;
22517       }
22518
22519   if (return_in_pc)
22520     par = emit_jump_insn (par);
22521   else
22522     par = emit_insn (par);
22523
22524   REG_NOTES (par) = dwarf;
22525   if (!return_in_pc)
22526     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22527                                  stack_pointer_rtx, stack_pointer_rtx);
22528 }
22529
22530 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22531    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22532
22533    Unfortunately, since this insn does not reflect very well the actual
22534    semantics of the operation, we need to annotate the insn for the benefit
22535    of DWARF2 frame unwind information.  */
22536 static void
22537 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22538 {
22539   int i, j;
22540   rtx par;
22541   rtx dwarf = NULL_RTX;
22542   rtx tmp, reg;
22543
22544   gcc_assert (num_regs && num_regs <= 32);
22545
22546     /* Workaround ARM10 VFPr1 bug.  */
22547   if (num_regs == 2 && !arm_arch6)
22548     {
22549       if (first_reg == 15)
22550         first_reg--;
22551
22552       num_regs++;
22553     }
22554
22555   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22556      there could be up to 32 D-registers to restore.
22557      If there are more than 16 D-registers, make two recursive calls,
22558      each of which emits one pop_multi instruction.  */
22559   if (num_regs > 16)
22560     {
22561       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22562       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22563       return;
22564     }
22565
22566   /* The parallel needs to hold num_regs SETs
22567      and one SET for the stack update.  */
22568   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22569
22570   /* Increment the stack pointer, based on there being
22571      num_regs 8-byte registers to restore.  */
22572   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22573   RTX_FRAME_RELATED_P (tmp) = 1;
22574   XVECEXP (par, 0, 0) = tmp;
22575
22576   /* Now show every reg that will be restored, using a SET for each.  */
22577   for (j = 0, i=first_reg; j < num_regs; i += 2)
22578     {
22579       reg = gen_rtx_REG (DFmode, i);
22580
22581       tmp = gen_rtx_SET (reg,
22582                          gen_frame_mem
22583                          (DFmode,
22584                           plus_constant (Pmode, base_reg, 8 * j)));
22585       RTX_FRAME_RELATED_P (tmp) = 1;
22586       XVECEXP (par, 0, j + 1) = tmp;
22587
22588       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22589
22590       j++;
22591     }
22592
22593   par = emit_insn (par);
22594   REG_NOTES (par) = dwarf;
22595
22596   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22597   if (REGNO (base_reg) == IP_REGNUM)
22598     {
22599       RTX_FRAME_RELATED_P (par) = 1;
22600       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22601     }
22602   else
22603     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22604                                  base_reg, base_reg);
22605 }
22606
22607 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22608    number of registers are being popped, multiple LDRD patterns are created for
22609    all register pairs.  If odd number of registers are popped, last register is
22610    loaded by using LDR pattern.  */
22611 static void
22612 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22613 {
22614   int num_regs = 0;
22615   int i, j;
22616   rtx par = NULL_RTX;
22617   rtx dwarf = NULL_RTX;
22618   rtx tmp, reg, tmp1;
22619   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22620
22621   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22622     if (saved_regs_mask & (1 << i))
22623       num_regs++;
22624
22625   gcc_assert (num_regs && num_regs <= 16);
22626
22627   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22628      to be popped.  So, if num_regs is even, now it will become odd,
22629      and we can generate pop with PC.  If num_regs is odd, it will be
22630      even now, and ldr with return can be generated for PC.  */
22631   if (return_in_pc)
22632     num_regs--;
22633
22634   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22635
22636   /* Var j iterates over all the registers to gather all the registers in
22637      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22638      A PARALLEL RTX of register-pair is created here, so that pattern for
22639      LDRD can be matched.  As PC is always last register to be popped, and
22640      we have already decremented num_regs if PC, we don't have to worry
22641      about PC in this loop.  */
22642   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22643     if (saved_regs_mask & (1 << j))
22644       {
22645         /* Create RTX for memory load.  */
22646         reg = gen_rtx_REG (SImode, j);
22647         tmp = gen_rtx_SET (reg,
22648                            gen_frame_mem (SImode,
22649                                plus_constant (Pmode,
22650                                               stack_pointer_rtx, 4 * i)));
22651         RTX_FRAME_RELATED_P (tmp) = 1;
22652
22653         if (i % 2 == 0)
22654           {
22655             /* When saved-register index (i) is even, the RTX to be emitted is
22656                yet to be created.  Hence create it first.  The LDRD pattern we
22657                are generating is :
22658                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22659                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22660                where target registers need not be consecutive.  */
22661             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22662             dwarf = NULL_RTX;
22663           }
22664
22665         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22666            added as 0th element and if i is odd, reg_i is added as 1st element
22667            of LDRD pattern shown above.  */
22668         XVECEXP (par, 0, (i % 2)) = tmp;
22669         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22670
22671         if ((i % 2) == 1)
22672           {
22673             /* When saved-register index (i) is odd, RTXs for both the registers
22674                to be loaded are generated in above given LDRD pattern, and the
22675                pattern can be emitted now.  */
22676             par = emit_insn (par);
22677             REG_NOTES (par) = dwarf;
22678             RTX_FRAME_RELATED_P (par) = 1;
22679           }
22680
22681         i++;
22682       }
22683
22684   /* If the number of registers pushed is odd AND return_in_pc is false OR
22685      number of registers are even AND return_in_pc is true, last register is
22686      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22687      then LDR with post increment.  */
22688
22689   /* Increment the stack pointer, based on there being
22690      num_regs 4-byte registers to restore.  */
22691   tmp = gen_rtx_SET (stack_pointer_rtx,
22692                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22693   RTX_FRAME_RELATED_P (tmp) = 1;
22694   tmp = emit_insn (tmp);
22695   if (!return_in_pc)
22696     {
22697       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22698                                    stack_pointer_rtx, stack_pointer_rtx);
22699     }
22700
22701   dwarf = NULL_RTX;
22702
22703   if (((num_regs % 2) == 1 && !return_in_pc)
22704       || ((num_regs % 2) == 0 && return_in_pc))
22705     {
22706       /* Scan for the single register to be popped.  Skip until the saved
22707          register is found.  */
22708       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22709
22710       /* Gen LDR with post increment here.  */
22711       tmp1 = gen_rtx_MEM (SImode,
22712                           gen_rtx_POST_INC (SImode,
22713                                             stack_pointer_rtx));
22714       set_mem_alias_set (tmp1, get_frame_alias_set ());
22715
22716       reg = gen_rtx_REG (SImode, j);
22717       tmp = gen_rtx_SET (reg, tmp1);
22718       RTX_FRAME_RELATED_P (tmp) = 1;
22719       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22720
22721       if (return_in_pc)
22722         {
22723           /* If return_in_pc, j must be PC_REGNUM.  */
22724           gcc_assert (j == PC_REGNUM);
22725           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22726           XVECEXP (par, 0, 0) = ret_rtx;
22727           XVECEXP (par, 0, 1) = tmp;
22728           par = emit_jump_insn (par);
22729         }
22730       else
22731         {
22732           par = emit_insn (tmp);
22733           REG_NOTES (par) = dwarf;
22734           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22735                                        stack_pointer_rtx, stack_pointer_rtx);
22736         }
22737
22738     }
22739   else if ((num_regs % 2) == 1 && return_in_pc)
22740     {
22741       /* There are 2 registers to be popped.  So, generate the pattern
22742          pop_multiple_with_stack_update_and_return to pop in PC.  */
22743       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22744     }
22745
22746   return;
22747 }
22748
22749 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22750    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22751    offset addressing and then generates one separate stack udpate. This provides
22752    more scheduling freedom, compared to writeback on every load.  However,
22753    if the function returns using load into PC directly
22754    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22755    before the last load.  TODO: Add a peephole optimization to recognize
22756    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22757    peephole optimization to merge the load at stack-offset zero
22758    with the stack update instruction using load with writeback
22759    in post-index addressing mode.  */
22760 static void
22761 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22762 {
22763   int j = 0;
22764   int offset = 0;
22765   rtx par = NULL_RTX;
22766   rtx dwarf = NULL_RTX;
22767   rtx tmp, mem;
22768
22769   /* Restore saved registers.  */
22770   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22771   j = 0;
22772   while (j <= LAST_ARM_REGNUM)
22773     if (saved_regs_mask & (1 << j))
22774       {
22775         if ((j % 2) == 0
22776             && (saved_regs_mask & (1 << (j + 1)))
22777             && (j + 1) != PC_REGNUM)
22778           {
22779             /* Current register and next register form register pair for which
22780                LDRD can be generated. PC is always the last register popped, and
22781                we handle it separately.  */
22782             if (offset > 0)
22783               mem = gen_frame_mem (DImode,
22784                                    plus_constant (Pmode,
22785                                                   stack_pointer_rtx,
22786                                                   offset));
22787             else
22788               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22789
22790             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22791             tmp = emit_insn (tmp);
22792             RTX_FRAME_RELATED_P (tmp) = 1;
22793
22794             /* Generate dwarf info.  */
22795
22796             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22797                                     gen_rtx_REG (SImode, j),
22798                                     NULL_RTX);
22799             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22800                                     gen_rtx_REG (SImode, j + 1),
22801                                     dwarf);
22802
22803             REG_NOTES (tmp) = dwarf;
22804
22805             offset += 8;
22806             j += 2;
22807           }
22808         else if (j != PC_REGNUM)
22809           {
22810             /* Emit a single word load.  */
22811             if (offset > 0)
22812               mem = gen_frame_mem (SImode,
22813                                    plus_constant (Pmode,
22814                                                   stack_pointer_rtx,
22815                                                   offset));
22816             else
22817               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22818
22819             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22820             tmp = emit_insn (tmp);
22821             RTX_FRAME_RELATED_P (tmp) = 1;
22822
22823             /* Generate dwarf info.  */
22824             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22825                                               gen_rtx_REG (SImode, j),
22826                                               NULL_RTX);
22827
22828             offset += 4;
22829             j += 1;
22830           }
22831         else /* j == PC_REGNUM */
22832           j++;
22833       }
22834     else
22835       j++;
22836
22837   /* Update the stack.  */
22838   if (offset > 0)
22839     {
22840       tmp = gen_rtx_SET (stack_pointer_rtx,
22841                          plus_constant (Pmode,
22842                                         stack_pointer_rtx,
22843                                         offset));
22844       tmp = emit_insn (tmp);
22845       arm_add_cfa_adjust_cfa_note (tmp, offset,
22846                                    stack_pointer_rtx, stack_pointer_rtx);
22847       offset = 0;
22848     }
22849
22850   if (saved_regs_mask & (1 << PC_REGNUM))
22851     {
22852       /* Only PC is to be popped.  */
22853       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22854       XVECEXP (par, 0, 0) = ret_rtx;
22855       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22856                          gen_frame_mem (SImode,
22857                                         gen_rtx_POST_INC (SImode,
22858                                                           stack_pointer_rtx)));
22859       RTX_FRAME_RELATED_P (tmp) = 1;
22860       XVECEXP (par, 0, 1) = tmp;
22861       par = emit_jump_insn (par);
22862
22863       /* Generate dwarf info.  */
22864       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22865                               gen_rtx_REG (SImode, PC_REGNUM),
22866                               NULL_RTX);
22867       REG_NOTES (par) = dwarf;
22868       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22869                                    stack_pointer_rtx, stack_pointer_rtx);
22870     }
22871 }
22872
22873 /* Calculate the size of the return value that is passed in registers.  */
22874 static unsigned
22875 arm_size_return_regs (void)
22876 {
22877   machine_mode mode;
22878
22879   if (crtl->return_rtx != 0)
22880     mode = GET_MODE (crtl->return_rtx);
22881   else
22882     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22883
22884   return GET_MODE_SIZE (mode);
22885 }
22886
22887 /* Return true if the current function needs to save/restore LR.  */
22888 static bool
22889 thumb_force_lr_save (void)
22890 {
22891   return !cfun->machine->lr_save_eliminated
22892          && (!crtl->is_leaf
22893              || thumb_far_jump_used_p ()
22894              || df_regs_ever_live_p (LR_REGNUM));
22895 }
22896
22897 /* We do not know if r3 will be available because
22898    we do have an indirect tailcall happening in this
22899    particular case.  */
22900 static bool
22901 is_indirect_tailcall_p (rtx call)
22902 {
22903   rtx pat = PATTERN (call);
22904
22905   /* Indirect tail call.  */
22906   pat = XVECEXP (pat, 0, 0);
22907   if (GET_CODE (pat) == SET)
22908     pat = SET_SRC (pat);
22909
22910   pat = XEXP (XEXP (pat, 0), 0);
22911   return REG_P (pat);
22912 }
22913
22914 /* Return true if r3 is used by any of the tail call insns in the
22915    current function.  */
22916 static bool
22917 any_sibcall_could_use_r3 (void)
22918 {
22919   edge_iterator ei;
22920   edge e;
22921
22922   if (!crtl->tail_call_emit)
22923     return false;
22924   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22925     if (e->flags & EDGE_SIBCALL)
22926       {
22927         rtx_insn *call = BB_END (e->src);
22928         if (!CALL_P (call))
22929           call = prev_nonnote_nondebug_insn (call);
22930         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22931         if (find_regno_fusage (call, USE, 3)
22932             || is_indirect_tailcall_p (call))
22933           return true;
22934       }
22935   return false;
22936 }
22937
22938
22939 /* Compute the distance from register FROM to register TO.
22940    These can be the arg pointer (26), the soft frame pointer (25),
22941    the stack pointer (13) or the hard frame pointer (11).
22942    In thumb mode r7 is used as the soft frame pointer, if needed.
22943    Typical stack layout looks like this:
22944
22945        old stack pointer -> |    |
22946                              ----
22947                             |    | \
22948                             |    |   saved arguments for
22949                             |    |   vararg functions
22950                             |    | /
22951                               --
22952    hard FP & arg pointer -> |    | \
22953                             |    |   stack
22954                             |    |   frame
22955                             |    | /
22956                               --
22957                             |    | \
22958                             |    |   call saved
22959                             |    |   registers
22960       soft frame pointer -> |    | /
22961                               --
22962                             |    | \
22963                             |    |   local
22964                             |    |   variables
22965      locals base pointer -> |    | /
22966                               --
22967                             |    | \
22968                             |    |   outgoing
22969                             |    |   arguments
22970    current stack pointer -> |    | /
22971                               --
22972
22973   For a given function some or all of these stack components
22974   may not be needed, giving rise to the possibility of
22975   eliminating some of the registers.
22976
22977   The values returned by this function must reflect the behavior
22978   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22979
22980   The sign of the number returned reflects the direction of stack
22981   growth, so the values are positive for all eliminations except
22982   from the soft frame pointer to the hard frame pointer.
22983
22984   SFP may point just inside the local variables block to ensure correct
22985   alignment.  */
22986
22987
22988 /* Return cached stack offsets.  */
22989
22990 static arm_stack_offsets *
22991 arm_get_frame_offsets (void)
22992 {
22993   struct arm_stack_offsets *offsets;
22994
22995   offsets = &cfun->machine->stack_offsets;
22996
22997   return offsets;
22998 }
22999
23000
23001 /* Calculate stack offsets.  These are used to calculate register elimination
23002    offsets and in prologue/epilogue code.  Also calculates which registers
23003    should be saved.  */
23004
23005 static void
23006 arm_compute_frame_layout (void)
23007 {
23008   struct arm_stack_offsets *offsets;
23009   unsigned long func_type;
23010   int saved;
23011   int core_saved;
23012   HOST_WIDE_INT frame_size;
23013   int i;
23014
23015   offsets = &cfun->machine->stack_offsets;
23016
23017   /* Initially this is the size of the local variables.  It will translated
23018      into an offset once we have determined the size of preceding data.  */
23019   frame_size = ROUND_UP_WORD (get_frame_size ());
23020
23021   /* Space for variadic functions.  */
23022   offsets->saved_args = crtl->args.pretend_args_size;
23023
23024   /* In Thumb mode this is incorrect, but never used.  */
23025   offsets->frame
23026     = (offsets->saved_args
23027        + arm_compute_static_chain_stack_bytes ()
23028        + (frame_pointer_needed ? 4 : 0));
23029
23030   if (TARGET_32BIT)
23031     {
23032       unsigned int regno;
23033
23034       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
23035       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23036       saved = core_saved;
23037
23038       /* We know that SP will be doubleword aligned on entry, and we must
23039          preserve that condition at any subroutine call.  We also require the
23040          soft frame pointer to be doubleword aligned.  */
23041
23042       if (TARGET_REALLY_IWMMXT)
23043         {
23044           /* Check for the call-saved iWMMXt registers.  */
23045           for (regno = FIRST_IWMMXT_REGNUM;
23046                regno <= LAST_IWMMXT_REGNUM;
23047                regno++)
23048             if (reg_needs_saving_p (regno))
23049               saved += 8;
23050         }
23051
23052       func_type = arm_current_func_type ();
23053       /* Space for saved VFP registers.  */
23054       if (! IS_VOLATILE (func_type)
23055           && TARGET_VFP_BASE)
23056         saved += arm_get_vfp_saved_size ();
23057
23058       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23059          nonecure entry functions with VSTR/VLDR.  */
23060       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23061         saved += 4;
23062     }
23063   else /* TARGET_THUMB1 */
23064     {
23065       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23066       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23067       saved = core_saved;
23068       if (TARGET_BACKTRACE)
23069         saved += 16;
23070     }
23071
23072   /* Saved registers include the stack frame.  */
23073   offsets->saved_regs
23074     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23075   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23076
23077   /* A leaf function does not need any stack alignment if it has nothing
23078      on the stack.  */
23079   if (crtl->is_leaf && frame_size == 0
23080       /* However if it calls alloca(), we have a dynamically allocated
23081          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
23082       && ! cfun->calls_alloca)
23083     {
23084       offsets->outgoing_args = offsets->soft_frame;
23085       offsets->locals_base = offsets->soft_frame;
23086       return;
23087     }
23088
23089   /* Ensure SFP has the correct alignment.  */
23090   if (ARM_DOUBLEWORD_ALIGN
23091       && (offsets->soft_frame & 7))
23092     {
23093       offsets->soft_frame += 4;
23094       /* Try to align stack by pushing an extra reg.  Don't bother doing this
23095          when there is a stack frame as the alignment will be rolled into
23096          the normal stack adjustment.  */
23097       if (frame_size + crtl->outgoing_args_size == 0)
23098         {
23099           int reg = -1;
23100
23101           /* Register r3 is caller-saved.  Normally it does not need to be
23102              saved on entry by the prologue.  However if we choose to save
23103              it for padding then we may confuse the compiler into thinking
23104              a prologue sequence is required when in fact it is not.  This
23105              will occur when shrink-wrapping if r3 is used as a scratch
23106              register and there are no other callee-saved writes.
23107
23108              This situation can be avoided when other callee-saved registers
23109              are available and r3 is not mandatory if we choose a callee-saved
23110              register for padding.  */
23111           bool prefer_callee_reg_p = false;
23112
23113           /* If it is safe to use r3, then do so.  This sometimes
23114              generates better code on Thumb-2 by avoiding the need to
23115              use 32-bit push/pop instructions.  */
23116           if (! any_sibcall_could_use_r3 ()
23117               && arm_size_return_regs () <= 12
23118               && (offsets->saved_regs_mask & (1 << 3)) == 0
23119               && (TARGET_THUMB2
23120                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23121             {
23122               reg = 3;
23123               if (!TARGET_THUMB2)
23124                 prefer_callee_reg_p = true;
23125             }
23126           if (reg == -1
23127               || prefer_callee_reg_p)
23128             {
23129               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23130                 {
23131                   /* Avoid fixed registers; they may be changed at
23132                      arbitrary times so it's unsafe to restore them
23133                      during the epilogue.  */
23134                   if (!fixed_regs[i]
23135                       && (offsets->saved_regs_mask & (1 << i)) == 0)
23136                     {
23137                       reg = i;
23138                       break;
23139                     }
23140                 }
23141             }
23142
23143           if (reg != -1)
23144             {
23145               offsets->saved_regs += 4;
23146               offsets->saved_regs_mask |= (1 << reg);
23147             }
23148         }
23149     }
23150
23151   offsets->locals_base = offsets->soft_frame + frame_size;
23152   offsets->outgoing_args = (offsets->locals_base
23153                             + crtl->outgoing_args_size);
23154
23155   if (ARM_DOUBLEWORD_ALIGN)
23156     {
23157       /* Ensure SP remains doubleword aligned.  */
23158       if (offsets->outgoing_args & 7)
23159         offsets->outgoing_args += 4;
23160       gcc_assert (!(offsets->outgoing_args & 7));
23161     }
23162 }
23163
23164
23165 /* Calculate the relative offsets for the different stack pointers.  Positive
23166    offsets are in the direction of stack growth.  */
23167
23168 HOST_WIDE_INT
23169 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23170 {
23171   arm_stack_offsets *offsets;
23172
23173   offsets = arm_get_frame_offsets ();
23174
23175   /* OK, now we have enough information to compute the distances.
23176      There must be an entry in these switch tables for each pair
23177      of registers in ELIMINABLE_REGS, even if some of the entries
23178      seem to be redundant or useless.  */
23179   switch (from)
23180     {
23181     case ARG_POINTER_REGNUM:
23182       switch (to)
23183         {
23184         case THUMB_HARD_FRAME_POINTER_REGNUM:
23185           return 0;
23186
23187         case FRAME_POINTER_REGNUM:
23188           /* This is the reverse of the soft frame pointer
23189              to hard frame pointer elimination below.  */
23190           return offsets->soft_frame - offsets->saved_args;
23191
23192         case ARM_HARD_FRAME_POINTER_REGNUM:
23193           /* This is only non-zero in the case where the static chain register
23194              is stored above the frame.  */
23195           return offsets->frame - offsets->saved_args - 4;
23196
23197         case STACK_POINTER_REGNUM:
23198           /* If nothing has been pushed on the stack at all
23199              then this will return -4.  This *is* correct!  */
23200           return offsets->outgoing_args - (offsets->saved_args + 4);
23201
23202         default:
23203           gcc_unreachable ();
23204         }
23205       gcc_unreachable ();
23206
23207     case FRAME_POINTER_REGNUM:
23208       switch (to)
23209         {
23210         case THUMB_HARD_FRAME_POINTER_REGNUM:
23211           return 0;
23212
23213         case ARM_HARD_FRAME_POINTER_REGNUM:
23214           /* The hard frame pointer points to the top entry in the
23215              stack frame.  The soft frame pointer to the bottom entry
23216              in the stack frame.  If there is no stack frame at all,
23217              then they are identical.  */
23218
23219           return offsets->frame - offsets->soft_frame;
23220
23221         case STACK_POINTER_REGNUM:
23222           return offsets->outgoing_args - offsets->soft_frame;
23223
23224         default:
23225           gcc_unreachable ();
23226         }
23227       gcc_unreachable ();
23228
23229     default:
23230       /* You cannot eliminate from the stack pointer.
23231          In theory you could eliminate from the hard frame
23232          pointer to the stack pointer, but this will never
23233          happen, since if a stack frame is not needed the
23234          hard frame pointer will never be used.  */
23235       gcc_unreachable ();
23236     }
23237 }
23238
23239 /* Given FROM and TO register numbers, say whether this elimination is
23240    allowed.  Frame pointer elimination is automatically handled.
23241
23242    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
23243    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
23244    pointer, we must eliminate FRAME_POINTER_REGNUM into
23245    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23246    ARG_POINTER_REGNUM.  */
23247
23248 bool
23249 arm_can_eliminate (const int from, const int to)
23250 {
23251   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23252           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23253           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23254           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23255            true);
23256 }
23257
23258 /* Emit RTL to save coprocessor registers on function entry.  Returns the
23259    number of bytes pushed.  */
23260
23261 static int
23262 arm_save_coproc_regs(void)
23263 {
23264   int saved_size = 0;
23265   unsigned reg;
23266   unsigned start_reg;
23267   rtx insn;
23268
23269   if (TARGET_REALLY_IWMMXT)
23270   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23271     if (reg_needs_saving_p (reg))
23272       {
23273         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23274         insn = gen_rtx_MEM (V2SImode, insn);
23275         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23276         RTX_FRAME_RELATED_P (insn) = 1;
23277         saved_size += 8;
23278       }
23279
23280   if (TARGET_VFP_BASE)
23281     {
23282       start_reg = FIRST_VFP_REGNUM;
23283
23284       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23285         {
23286           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23287             {
23288               if (start_reg != reg)
23289                 saved_size += vfp_emit_fstmd (start_reg,
23290                                               (reg - start_reg) / 2);
23291               start_reg = reg + 2;
23292             }
23293         }
23294       if (start_reg != reg)
23295         saved_size += vfp_emit_fstmd (start_reg,
23296                                       (reg - start_reg) / 2);
23297     }
23298   return saved_size;
23299 }
23300
23301
23302 /* Set the Thumb frame pointer from the stack pointer.  */
23303
23304 static void
23305 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23306 {
23307   HOST_WIDE_INT amount;
23308   rtx insn, dwarf;
23309
23310   amount = offsets->outgoing_args - offsets->locals_base;
23311   if (amount < 1024)
23312     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23313                                   stack_pointer_rtx, GEN_INT (amount)));
23314   else
23315     {
23316       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23317       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
23318          expects the first two operands to be the same.  */
23319       if (TARGET_THUMB2)
23320         {
23321           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23322                                         stack_pointer_rtx,
23323                                         hard_frame_pointer_rtx));
23324         }
23325       else
23326         {
23327           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23328                                         hard_frame_pointer_rtx,
23329                                         stack_pointer_rtx));
23330         }
23331       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23332                            plus_constant (Pmode, stack_pointer_rtx, amount));
23333       RTX_FRAME_RELATED_P (dwarf) = 1;
23334       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23335     }
23336
23337   RTX_FRAME_RELATED_P (insn) = 1;
23338 }
23339
23340 struct scratch_reg {
23341   rtx reg;
23342   bool saved;
23343 };
23344
23345 /* Return a short-lived scratch register for use as a 2nd scratch register on
23346    function entry after the registers are saved in the prologue.  This register
23347    must be released by means of release_scratch_register_on_entry.  IP is not
23348    considered since it is always used as the 1st scratch register if available.
23349
23350    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23351    mask of live registers.  */
23352
23353 static void
23354 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23355                                unsigned long live_regs)
23356 {
23357   int regno = -1;
23358
23359   sr->saved = false;
23360
23361   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23362     regno = LR_REGNUM;
23363   else
23364     {
23365       unsigned int i;
23366
23367       for (i = 4; i < 11; i++)
23368         if (regno1 != i && (live_regs & (1 << i)) != 0)
23369           {
23370             regno = i;
23371             break;
23372           }
23373
23374       if (regno < 0)
23375         {
23376           /* If IP is used as the 1st scratch register for a nested function,
23377              then either r3 wasn't available or is used to preserve IP.  */
23378           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23379             regno1 = 3;
23380           regno = (regno1 == 3 ? 2 : 3);
23381           sr->saved
23382             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23383                                regno);
23384         }
23385     }
23386
23387   sr->reg = gen_rtx_REG (SImode, regno);
23388   if (sr->saved)
23389     {
23390       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23391       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23392       rtx x = gen_rtx_SET (stack_pointer_rtx,
23393                            plus_constant (Pmode, stack_pointer_rtx, -4));
23394       RTX_FRAME_RELATED_P (insn) = 1;
23395       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23396     }
23397 }
23398
23399 /* Release a scratch register obtained from the preceding function.  */
23400
23401 static void
23402 release_scratch_register_on_entry (struct scratch_reg *sr)
23403 {
23404   if (sr->saved)
23405     {
23406       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23407       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23408       rtx x = gen_rtx_SET (stack_pointer_rtx,
23409                            plus_constant (Pmode, stack_pointer_rtx, 4));
23410       RTX_FRAME_RELATED_P (insn) = 1;
23411       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23412     }
23413 }
23414
23415 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23416
23417 #if PROBE_INTERVAL > 4096
23418 #error Cannot use indexed addressing mode for stack probing
23419 #endif
23420
23421 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23422    inclusive.  These are offsets from the current stack pointer.  REGNO1
23423    is the index number of the 1st scratch register and LIVE_REGS is the
23424    mask of live registers.  */
23425
23426 static void
23427 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23428                             unsigned int regno1, unsigned long live_regs)
23429 {
23430   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23431
23432   /* See if we have a constant small number of probes to generate.  If so,
23433      that's the easy case.  */
23434   if (size <= PROBE_INTERVAL)
23435     {
23436       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23437       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23438       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23439     }
23440
23441   /* The run-time loop is made up of 10 insns in the generic case while the
23442      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23443   else if (size <= 5 * PROBE_INTERVAL)
23444     {
23445       HOST_WIDE_INT i, rem;
23446
23447       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23448       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23449       emit_stack_probe (reg1);
23450
23451       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23452          it exceeds SIZE.  If only two probes are needed, this will not
23453          generate any code.  Then probe at FIRST + SIZE.  */
23454       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23455         {
23456           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23457           emit_stack_probe (reg1);
23458         }
23459
23460       rem = size - (i - PROBE_INTERVAL);
23461       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23462         {
23463           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23464           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23465         }
23466       else
23467         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23468     }
23469
23470   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23471      extra careful with variables wrapping around because we might be at
23472      the very top (or the very bottom) of the address space and we have
23473      to be able to handle this case properly; in particular, we use an
23474      equality test for the loop condition.  */
23475   else
23476     {
23477       HOST_WIDE_INT rounded_size;
23478       struct scratch_reg sr;
23479
23480       get_scratch_register_on_entry (&sr, regno1, live_regs);
23481
23482       emit_move_insn (reg1, GEN_INT (first));
23483
23484
23485       /* Step 1: round SIZE to the previous multiple of the interval.  */
23486
23487       rounded_size = size & -PROBE_INTERVAL;
23488       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23489
23490
23491       /* Step 2: compute initial and final value of the loop counter.  */
23492
23493       /* TEST_ADDR = SP + FIRST.  */
23494       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23495
23496       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23497       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23498
23499
23500       /* Step 3: the loop
23501
23502          do
23503            {
23504              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23505              probe at TEST_ADDR
23506            }
23507          while (TEST_ADDR != LAST_ADDR)
23508
23509          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23510          until it is equal to ROUNDED_SIZE.  */
23511
23512       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23513
23514
23515       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23516          that SIZE is equal to ROUNDED_SIZE.  */
23517
23518       if (size != rounded_size)
23519         {
23520           HOST_WIDE_INT rem = size - rounded_size;
23521
23522           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23523             {
23524               emit_set_insn (sr.reg,
23525                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23526               emit_stack_probe (plus_constant (Pmode, sr.reg,
23527                                                PROBE_INTERVAL - rem));
23528             }
23529           else
23530             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23531         }
23532
23533       release_scratch_register_on_entry (&sr);
23534     }
23535
23536   /* Make sure nothing is scheduled before we are done.  */
23537   emit_insn (gen_blockage ());
23538 }
23539
23540 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23541    absolute addresses.  */
23542
23543 const char *
23544 output_probe_stack_range (rtx reg1, rtx reg2)
23545 {
23546   static int labelno = 0;
23547   char loop_lab[32];
23548   rtx xops[2];
23549
23550   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23551
23552   /* Loop.  */
23553   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23554
23555   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23556   xops[0] = reg1;
23557   xops[1] = GEN_INT (PROBE_INTERVAL);
23558   output_asm_insn ("sub\t%0, %0, %1", xops);
23559
23560   /* Probe at TEST_ADDR.  */
23561   output_asm_insn ("str\tr0, [%0, #0]", xops);
23562
23563   /* Test if TEST_ADDR == LAST_ADDR.  */
23564   xops[1] = reg2;
23565   output_asm_insn ("cmp\t%0, %1", xops);
23566
23567   /* Branch.  */
23568   fputs ("\tbne\t", asm_out_file);
23569   assemble_name_raw (asm_out_file, loop_lab);
23570   fputc ('\n', asm_out_file);
23571
23572   return "";
23573 }
23574
23575 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23576    function.  */
23577 void
23578 arm_expand_prologue (void)
23579 {
23580   rtx amount;
23581   rtx insn;
23582   rtx ip_rtx;
23583   unsigned long live_regs_mask;
23584   unsigned long func_type;
23585   int fp_offset = 0;
23586   int saved_pretend_args = 0;
23587   int saved_regs = 0;
23588   unsigned HOST_WIDE_INT args_to_push;
23589   HOST_WIDE_INT size;
23590   arm_stack_offsets *offsets;
23591   bool clobber_ip;
23592
23593   func_type = arm_current_func_type ();
23594
23595   /* Naked functions don't have prologues.  */
23596   if (IS_NAKED (func_type))
23597     {
23598       if (flag_stack_usage_info)
23599         current_function_static_stack_size = 0;
23600       return;
23601     }
23602
23603   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23604   args_to_push = crtl->args.pretend_args_size;
23605
23606   /* Compute which register we will have to save onto the stack.  */
23607   offsets = arm_get_frame_offsets ();
23608   live_regs_mask = offsets->saved_regs_mask;
23609
23610   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23611
23612   if (IS_STACKALIGN (func_type))
23613     {
23614       rtx r0, r1;
23615
23616       /* Handle a word-aligned stack pointer.  We generate the following:
23617
23618           mov r0, sp
23619           bic r1, r0, #7
23620           mov sp, r1
23621           <save and restore r0 in normal prologue/epilogue>
23622           mov sp, r0
23623           bx lr
23624
23625          The unwinder doesn't need to know about the stack realignment.
23626          Just tell it we saved SP in r0.  */
23627       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23628
23629       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23630       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23631
23632       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23633       RTX_FRAME_RELATED_P (insn) = 1;
23634       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23635
23636       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23637
23638       /* ??? The CFA changes here, which may cause GDB to conclude that it
23639          has entered a different function.  That said, the unwind info is
23640          correct, individually, before and after this instruction because
23641          we've described the save of SP, which will override the default
23642          handling of SP as restoring from the CFA.  */
23643       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23644     }
23645
23646   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23647      now the value must be -1 as stored by arm_init_machine_status ().  */
23648   cfun->machine->static_chain_stack_bytes
23649     = arm_compute_static_chain_stack_bytes ();
23650
23651   /* The static chain register is the same as the IP register.  If it is
23652      clobbered when creating the frame, we need to save and restore it.  */
23653   clobber_ip = (IS_NESTED (func_type)
23654                 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23655                      || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23656                           || flag_stack_clash_protection)
23657                          && !df_regs_ever_live_p (LR_REGNUM)
23658                          && arm_r3_live_at_start_p ()))
23659                     || arm_current_function_pac_enabled_p ()));
23660
23661   /* Find somewhere to store IP whilst the frame is being created.
23662      We try the following places in order:
23663
23664        1. The last argument register r3 if it is available.
23665        2. A slot on the stack above the frame if there are no
23666           arguments to push onto the stack.
23667        3. Register r3 again, after pushing the argument registers
23668           onto the stack, if this is a varargs function.
23669        4. The last slot on the stack created for the arguments to
23670           push, if this isn't a varargs function.
23671
23672      Note - we only need to tell the dwarf2 backend about the SP
23673      adjustment in the second variant; the static chain register
23674      doesn't need to be unwound, as it doesn't contain a value
23675      inherited from the caller.  */
23676   if (clobber_ip)
23677     {
23678       if (!arm_r3_live_at_start_p ())
23679         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23680       else if (args_to_push == 0)
23681         {
23682           rtx addr, dwarf;
23683
23684           saved_regs += 4;
23685
23686           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23687           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23688           fp_offset = 4;
23689
23690           /* Just tell the dwarf backend that we adjusted SP.  */
23691           dwarf = gen_rtx_SET (stack_pointer_rtx,
23692                                plus_constant (Pmode, stack_pointer_rtx,
23693                                               -fp_offset));
23694           RTX_FRAME_RELATED_P (insn) = 1;
23695           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23696           if (arm_current_function_pac_enabled_p ())
23697             cfun->machine->pacspval_needed = 1;
23698         }
23699       else
23700         {
23701           /* Store the args on the stack.  */
23702           if (cfun->machine->uses_anonymous_args)
23703             {
23704               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23705                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23706               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23707               saved_pretend_args = 1;
23708             }
23709           else
23710             {
23711               rtx addr, dwarf;
23712
23713               if (args_to_push == 4)
23714                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23715               else
23716                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23717                                            plus_constant (Pmode,
23718                                                           stack_pointer_rtx,
23719                                                           -args_to_push));
23720
23721               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23722
23723               /* Just tell the dwarf backend that we adjusted SP.  */
23724               dwarf = gen_rtx_SET (stack_pointer_rtx,
23725                                    plus_constant (Pmode, stack_pointer_rtx,
23726                                                   -args_to_push));
23727               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23728             }
23729
23730           RTX_FRAME_RELATED_P (insn) = 1;
23731           fp_offset = args_to_push;
23732           args_to_push = 0;
23733           if (arm_current_function_pac_enabled_p ())
23734             cfun->machine->pacspval_needed = 1;
23735         }
23736     }
23737
23738   if (arm_current_function_pac_enabled_p ())
23739     {
23740       /* If IP was clobbered we only emit a PAC instruction as the BTI
23741          one will be added before the push of the clobbered IP (if
23742          necessary) by the bti pass.  */
23743       if (aarch_bti_enabled () && !clobber_ip)
23744         insn = emit_insn (gen_pacbti_nop ());
23745       else
23746         insn = emit_insn (gen_pac_nop ());
23747
23748       rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23749       RTX_FRAME_RELATED_P (insn) = 1;
23750       add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23751     }
23752
23753   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23754     {
23755       if (IS_INTERRUPT (func_type))
23756         {
23757           /* Interrupt functions must not corrupt any registers.
23758              Creating a frame pointer however, corrupts the IP
23759              register, so we must push it first.  */
23760           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23761
23762           /* Do not set RTX_FRAME_RELATED_P on this insn.
23763              The dwarf stack unwinding code only wants to see one
23764              stack decrement per function, and this is not it.  If
23765              this instruction is labeled as being part of the frame
23766              creation sequence then dwarf2out_frame_debug_expr will
23767              die when it encounters the assignment of IP to FP
23768              later on, since the use of SP here establishes SP as
23769              the CFA register and not IP.
23770
23771              Anyway this instruction is not really part of the stack
23772              frame creation although it is part of the prologue.  */
23773         }
23774
23775       insn = emit_set_insn (ip_rtx,
23776                             plus_constant (Pmode, stack_pointer_rtx,
23777                                            fp_offset));
23778       RTX_FRAME_RELATED_P (insn) = 1;
23779     }
23780
23781   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23782   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23783     {
23784       saved_regs += 4;
23785       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23786                                                 GEN_INT (FPCXTNS_ENUM)));
23787       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23788                           plus_constant (Pmode, stack_pointer_rtx, -4));
23789       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23790       RTX_FRAME_RELATED_P (insn) = 1;
23791     }
23792
23793   if (args_to_push)
23794     {
23795       /* Push the argument registers, or reserve space for them.  */
23796       if (cfun->machine->uses_anonymous_args)
23797         insn = emit_multi_reg_push
23798           ((0xf0 >> (args_to_push / 4)) & 0xf,
23799            (0xf0 >> (args_to_push / 4)) & 0xf);
23800       else
23801         insn = emit_insn
23802           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23803                        GEN_INT (- args_to_push)));
23804       RTX_FRAME_RELATED_P (insn) = 1;
23805     }
23806
23807   /* If this is an interrupt service routine, and the link register
23808      is going to be pushed, and we're not generating extra
23809      push of IP (needed when frame is needed and frame layout if apcs),
23810      subtracting four from LR now will mean that the function return
23811      can be done with a single instruction.  */
23812   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23813       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23814       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23815       && TARGET_ARM)
23816     {
23817       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23818
23819       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23820     }
23821
23822   if (live_regs_mask)
23823     {
23824       unsigned long dwarf_regs_mask = live_regs_mask;
23825
23826       saved_regs += bit_count (live_regs_mask) * 4;
23827       if (optimize_size && !frame_pointer_needed
23828           && saved_regs == offsets->saved_regs - offsets->saved_args)
23829         {
23830           /* If no coprocessor registers are being pushed and we don't have
23831              to worry about a frame pointer then push extra registers to
23832              create the stack frame.  This is done in a way that does not
23833              alter the frame layout, so is independent of the epilogue.  */
23834           int n;
23835           int frame;
23836           n = 0;
23837           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23838             n++;
23839           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23840           if (frame && n * 4 >= frame)
23841             {
23842               n = frame / 4;
23843               live_regs_mask |= (1 << n) - 1;
23844               saved_regs += frame;
23845             }
23846         }
23847
23848       if (TARGET_LDRD
23849           && current_tune->prefer_ldrd_strd
23850           && !optimize_function_for_size_p (cfun))
23851         {
23852           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23853           if (TARGET_THUMB2)
23854             thumb2_emit_strd_push (live_regs_mask);
23855           else if (TARGET_ARM
23856                    && !TARGET_APCS_FRAME
23857                    && !IS_INTERRUPT (func_type))
23858             arm_emit_strd_push (live_regs_mask);
23859           else
23860             {
23861               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23862               RTX_FRAME_RELATED_P (insn) = 1;
23863             }
23864         }
23865       else
23866         {
23867           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23868           RTX_FRAME_RELATED_P (insn) = 1;
23869         }
23870     }
23871
23872   if (! IS_VOLATILE (func_type))
23873     saved_regs += arm_save_coproc_regs ();
23874
23875   if (frame_pointer_needed && TARGET_ARM)
23876     {
23877       /* Create the new frame pointer.  */
23878       if (TARGET_APCS_FRAME)
23879         {
23880           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23881           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23882           RTX_FRAME_RELATED_P (insn) = 1;
23883         }
23884       else
23885         {
23886           insn = GEN_INT (saved_regs - (4 + fp_offset));
23887           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23888                                         stack_pointer_rtx, insn));
23889           RTX_FRAME_RELATED_P (insn) = 1;
23890         }
23891     }
23892
23893   size = offsets->outgoing_args - offsets->saved_args;
23894   if (flag_stack_usage_info)
23895     current_function_static_stack_size = size;
23896
23897   /* If this isn't an interrupt service routine and we have a frame, then do
23898      stack checking.  We use IP as the first scratch register, except for the
23899      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23900   if (!IS_INTERRUPT (func_type)
23901       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23902           || flag_stack_clash_protection))
23903     {
23904       unsigned int regno;
23905
23906       if (!IS_NESTED (func_type) || clobber_ip)
23907         regno = IP_REGNUM;
23908       else if (df_regs_ever_live_p (LR_REGNUM))
23909         regno = LR_REGNUM;
23910       else
23911         regno = 3;
23912
23913       if (crtl->is_leaf && !cfun->calls_alloca)
23914         {
23915           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23916             arm_emit_probe_stack_range (get_stack_check_protect (),
23917                                         size - get_stack_check_protect (),
23918                                         regno, live_regs_mask);
23919         }
23920       else if (size > 0)
23921         arm_emit_probe_stack_range (get_stack_check_protect (), size,
23922                                     regno, live_regs_mask);
23923     }
23924
23925   /* Recover the static chain register.  */
23926   if (clobber_ip)
23927     {
23928       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23929         insn = gen_rtx_REG (SImode, 3);
23930       else
23931         {
23932           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23933           insn = gen_frame_mem (SImode, insn);
23934         }
23935       emit_set_insn (ip_rtx, insn);
23936       emit_insn (gen_force_register_use (ip_rtx));
23937     }
23938
23939   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23940     {
23941       /* This add can produce multiple insns for a large constant, so we
23942          need to get tricky.  */
23943       rtx_insn *last = get_last_insn ();
23944
23945       amount = GEN_INT (offsets->saved_args + saved_regs
23946                         - offsets->outgoing_args);
23947
23948       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23949                                     amount));
23950       do
23951         {
23952           last = last ? NEXT_INSN (last) : get_insns ();
23953           RTX_FRAME_RELATED_P (last) = 1;
23954         }
23955       while (last != insn);
23956
23957       /* If the frame pointer is needed, emit a special barrier that
23958          will prevent the scheduler from moving stores to the frame
23959          before the stack adjustment.  */
23960       if (frame_pointer_needed)
23961         emit_insn (gen_stack_tie (stack_pointer_rtx,
23962                                   hard_frame_pointer_rtx));
23963     }
23964
23965
23966   if (frame_pointer_needed && TARGET_THUMB2)
23967     thumb_set_frame_pointer (offsets);
23968
23969   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23970     {
23971       unsigned long mask;
23972
23973       mask = live_regs_mask;
23974       mask &= THUMB2_WORK_REGS;
23975       if (!IS_NESTED (func_type))
23976         mask |= (1 << IP_REGNUM);
23977       arm_load_pic_register (mask, NULL_RTX);
23978     }
23979
23980   /* If we are profiling, make sure no instructions are scheduled before
23981      the call to mcount.  Similarly if the user has requested no
23982      scheduling in the prolog.  Similarly if we want non-call exceptions
23983      using the EABI unwinder, to prevent faulting instructions from being
23984      swapped with a stack adjustment.  */
23985   if (crtl->profile || !TARGET_SCHED_PROLOG
23986       || (arm_except_unwind_info (&global_options) == UI_TARGET
23987           && cfun->can_throw_non_call_exceptions))
23988     emit_insn (gen_blockage ());
23989
23990   /* If the link register is being kept alive, with the return address in it,
23991      then make sure that it does not get reused by the ce2 pass.  */
23992   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23993     cfun->machine->lr_save_eliminated = 1;
23994 }
23995 \f
23996 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23997 static void
23998 arm_print_condition (FILE *stream)
23999 {
24000   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
24001     {
24002       /* Branch conversion is not implemented for Thumb-2.  */
24003       if (TARGET_THUMB)
24004         {
24005           output_operand_lossage ("predicated Thumb instruction");
24006           return;
24007         }
24008       if (current_insn_predicate != NULL)
24009         {
24010           output_operand_lossage
24011             ("predicated instruction in conditional sequence");
24012           return;
24013         }
24014
24015       fputs (arm_condition_codes[arm_current_cc], stream);
24016     }
24017   else if (current_insn_predicate)
24018     {
24019       enum arm_cond_code code;
24020
24021       if (TARGET_THUMB1)
24022         {
24023           output_operand_lossage ("predicated Thumb instruction");
24024           return;
24025         }
24026
24027       code = get_arm_condition_code (current_insn_predicate);
24028       fputs (arm_condition_codes[code], stream);
24029     }
24030 }
24031
24032
24033 /* Globally reserved letters: acln
24034    Puncutation letters currently used: @_|?().!#
24035    Lower case letters currently used: bcdefhimpqtvwxyz
24036    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24037    Letters previously used, but now deprecated/obsolete: sWXYZ.
24038
24039    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24040
24041    If CODE is 'd', then the X is a condition operand and the instruction
24042    should only be executed if the condition is true.
24043    if CODE is 'D', then the X is a condition operand and the instruction
24044    should only be executed if the condition is false: however, if the mode
24045    of the comparison is CCFPEmode, then always execute the instruction -- we
24046    do this because in these circumstances !GE does not necessarily imply LT;
24047    in these cases the instruction pattern will take care to make sure that
24048    an instruction containing %d will follow, thereby undoing the effects of
24049    doing this instruction unconditionally.
24050    If CODE is 'N' then X is a floating point operand that must be negated
24051    before output.
24052    If CODE is 'B' then output a bitwise inverted value of X (a const int).
24053    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24054    If CODE is 'V', then the operand must be a CONST_INT representing
24055    the bits to preserve in the modified register (Rd) of a BFI or BFC
24056    instruction: print out both the width and lsb (shift) fields.  */
24057 static void
24058 arm_print_operand (FILE *stream, rtx x, int code)
24059 {
24060   switch (code)
24061     {
24062     case '@':
24063       fputs (ASM_COMMENT_START, stream);
24064       return;
24065
24066     case '_':
24067       fputs (user_label_prefix, stream);
24068       return;
24069
24070     case '|':
24071       fputs (REGISTER_PREFIX, stream);
24072       return;
24073
24074     case '?':
24075       arm_print_condition (stream);
24076       return;
24077
24078     case '.':
24079       /* The current condition code for a condition code setting instruction.
24080          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
24081       fputc('s', stream);
24082       arm_print_condition (stream);
24083       return;
24084
24085     case '!':
24086       /* If the instruction is conditionally executed then print
24087          the current condition code, otherwise print 's'.  */
24088       gcc_assert (TARGET_THUMB2);
24089       if (current_insn_predicate)
24090         arm_print_condition (stream);
24091       else
24092         fputc('s', stream);
24093       break;
24094
24095     /* %# is a "break" sequence. It doesn't output anything, but is used to
24096        separate e.g. operand numbers from following text, if that text consists
24097        of further digits which we don't want to be part of the operand
24098        number.  */
24099     case '#':
24100       return;
24101
24102     case 'N':
24103       {
24104         REAL_VALUE_TYPE r;
24105         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24106         fprintf (stream, "%s", fp_const_from_val (&r));
24107       }
24108       return;
24109
24110     /* An integer or symbol address without a preceding # sign.  */
24111     case 'c':
24112       switch (GET_CODE (x))
24113         {
24114         case CONST_INT:
24115           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24116           break;
24117
24118         case SYMBOL_REF:
24119           output_addr_const (stream, x);
24120           break;
24121
24122         case CONST:
24123           if (GET_CODE (XEXP (x, 0)) == PLUS
24124               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24125             {
24126               output_addr_const (stream, x);
24127               break;
24128             }
24129           /* Fall through.  */
24130
24131         default:
24132           output_operand_lossage ("Unsupported operand for code '%c'", code);
24133         }
24134       return;
24135
24136     /* An integer that we want to print in HEX.  */
24137     case 'x':
24138       switch (GET_CODE (x))
24139         {
24140         case CONST_INT:
24141           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24142           break;
24143
24144         default:
24145           output_operand_lossage ("Unsupported operand for code '%c'", code);
24146         }
24147       return;
24148
24149     case 'B':
24150       if (CONST_INT_P (x))
24151         {
24152           HOST_WIDE_INT val;
24153           val = ARM_SIGN_EXTEND (~INTVAL (x));
24154           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24155         }
24156       else
24157         {
24158           putc ('~', stream);
24159           output_addr_const (stream, x);
24160         }
24161       return;
24162
24163     case 'b':
24164       /* Print the log2 of a CONST_INT.  */
24165       {
24166         HOST_WIDE_INT val;
24167
24168         if (!CONST_INT_P (x)
24169             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24170           output_operand_lossage ("Unsupported operand for code '%c'", code);
24171         else
24172           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24173       }
24174       return;
24175
24176     case 'L':
24177       /* The low 16 bits of an immediate constant.  */
24178       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24179       return;
24180
24181     case 'i':
24182       fprintf (stream, "%s", arithmetic_instr (x, 1));
24183       return;
24184
24185     case 'I':
24186       fprintf (stream, "%s", arithmetic_instr (x, 0));
24187       return;
24188
24189     case 'S':
24190       {
24191         HOST_WIDE_INT val;
24192         const char *shift;
24193
24194         shift = shift_op (x, &val);
24195
24196         if (shift)
24197           {
24198             fprintf (stream, ", %s ", shift);
24199             if (val == -1)
24200               arm_print_operand (stream, XEXP (x, 1), 0);
24201             else
24202               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24203           }
24204       }
24205       return;
24206
24207       /* An explanation of the 'Q', 'R' and 'H' register operands:
24208
24209          In a pair of registers containing a DI or DF value the 'Q'
24210          operand returns the register number of the register containing
24211          the least significant part of the value.  The 'R' operand returns
24212          the register number of the register containing the most
24213          significant part of the value.
24214
24215          The 'H' operand returns the higher of the two register numbers.
24216          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24217          same as the 'Q' operand, since the most significant part of the
24218          value is held in the lower number register.  The reverse is true
24219          on systems where WORDS_BIG_ENDIAN is false.
24220
24221          The purpose of these operands is to distinguish between cases
24222          where the endian-ness of the values is important (for example
24223          when they are added together), and cases where the endian-ness
24224          is irrelevant, but the order of register operations is important.
24225          For example when loading a value from memory into a register
24226          pair, the endian-ness does not matter.  Provided that the value
24227          from the lower memory address is put into the lower numbered
24228          register, and the value from the higher address is put into the
24229          higher numbered register, the load will work regardless of whether
24230          the value being loaded is big-wordian or little-wordian.  The
24231          order of the two register loads can matter however, if the address
24232          of the memory location is actually held in one of the registers
24233          being overwritten by the load.
24234
24235          The 'Q' and 'R' constraints are also available for 64-bit
24236          constants.  */
24237     case 'Q':
24238       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24239         {
24240           rtx part = gen_lowpart (SImode, x);
24241           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24242           return;
24243         }
24244
24245       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24246         {
24247           output_operand_lossage ("invalid operand for code '%c'", code);
24248           return;
24249         }
24250
24251       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24252       return;
24253
24254     case 'R':
24255       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24256         {
24257           machine_mode mode = GET_MODE (x);
24258           rtx part;
24259
24260           if (mode == VOIDmode)
24261             mode = DImode;
24262           part = gen_highpart_mode (SImode, mode, x);
24263           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24264           return;
24265         }
24266
24267       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24268         {
24269           output_operand_lossage ("invalid operand for code '%c'", code);
24270           return;
24271         }
24272
24273       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24274       return;
24275
24276     case 'H':
24277       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24278         {
24279           output_operand_lossage ("invalid operand for code '%c'", code);
24280           return;
24281         }
24282
24283       asm_fprintf (stream, "%r", REGNO (x) + 1);
24284       return;
24285
24286     case 'J':
24287       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24288         {
24289           output_operand_lossage ("invalid operand for code '%c'", code);
24290           return;
24291         }
24292
24293       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24294       return;
24295
24296     case 'K':
24297       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24298         {
24299           output_operand_lossage ("invalid operand for code '%c'", code);
24300           return;
24301         }
24302
24303       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24304       return;
24305
24306     case 'm':
24307       asm_fprintf (stream, "%r",
24308                    REG_P (XEXP (x, 0))
24309                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24310       return;
24311
24312     case 'M':
24313       asm_fprintf (stream, "{%r-%r}",
24314                    REGNO (x),
24315                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24316       return;
24317
24318     /* Like 'M', but writing doubleword vector registers, for use by Neon
24319        insns.  */
24320     case 'h':
24321       {
24322         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24323         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24324         if (numregs == 1)
24325           asm_fprintf (stream, "{d%d}", regno);
24326         else
24327           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24328       }
24329       return;
24330
24331     case 'd':
24332       /* CONST_TRUE_RTX means always -- that's the default.  */
24333       if (x == const_true_rtx)
24334         return;
24335
24336       if (!COMPARISON_P (x))
24337         {
24338           output_operand_lossage ("invalid operand for code '%c'", code);
24339           return;
24340         }
24341
24342       fputs (arm_condition_codes[get_arm_condition_code (x)],
24343              stream);
24344       return;
24345
24346     case 'D':
24347       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
24348          want to do that.  */
24349       if (x == const_true_rtx)
24350         {
24351           output_operand_lossage ("instruction never executed");
24352           return;
24353         }
24354       if (!COMPARISON_P (x))
24355         {
24356           output_operand_lossage ("invalid operand for code '%c'", code);
24357           return;
24358         }
24359
24360       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24361                                  (get_arm_condition_code (x))],
24362              stream);
24363       return;
24364
24365     case 'V':
24366       {
24367         /* Output the LSB (shift) and width for a bitmask instruction
24368            based on a literal mask.  The LSB is printed first,
24369            followed by the width.
24370
24371            Eg. For 0b1...1110001, the result is #1, #3.  */
24372         if (!CONST_INT_P (x))
24373           {
24374             output_operand_lossage ("invalid operand for code '%c'", code);
24375             return;
24376           }
24377
24378         unsigned HOST_WIDE_INT val
24379           = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24380         int lsb = exact_log2 (val & -val);
24381         asm_fprintf (stream, "#%d, #%d", lsb,
24382                      (exact_log2 (val + (val & -val)) - lsb));
24383       }
24384       return;
24385
24386     case 's':
24387     case 'W':
24388     case 'X':
24389     case 'Y':
24390     case 'Z':
24391       /* Former Maverick support, removed after GCC-4.7.  */
24392       output_operand_lossage ("obsolete Maverick format code '%c'", code);
24393       return;
24394
24395     case 'U':
24396       if (!REG_P (x)
24397           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24398           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24399         /* Bad value for wCG register number.  */
24400         {
24401           output_operand_lossage ("invalid operand for code '%c'", code);
24402           return;
24403         }
24404
24405       else
24406         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24407       return;
24408
24409       /* Print an iWMMXt control register name.  */
24410     case 'w':
24411       if (!CONST_INT_P (x)
24412           || INTVAL (x) < 0
24413           || INTVAL (x) >= 16)
24414         /* Bad value for wC register number.  */
24415         {
24416           output_operand_lossage ("invalid operand for code '%c'", code);
24417           return;
24418         }
24419
24420       else
24421         {
24422           static const char * wc_reg_names [16] =
24423             {
24424               "wCID",  "wCon",  "wCSSF", "wCASF",
24425               "wC4",   "wC5",   "wC6",   "wC7",
24426               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24427               "wC12",  "wC13",  "wC14",  "wC15"
24428             };
24429
24430           fputs (wc_reg_names [INTVAL (x)], stream);
24431         }
24432       return;
24433
24434     /* Print the high single-precision register of a VFP double-precision
24435        register.  */
24436     case 'p':
24437       {
24438         machine_mode mode = GET_MODE (x);
24439         int regno;
24440
24441         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24442           {
24443             output_operand_lossage ("invalid operand for code '%c'", code);
24444             return;
24445           }
24446
24447         regno = REGNO (x);
24448         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24449           {
24450             output_operand_lossage ("invalid operand for code '%c'", code);
24451             return;
24452           }
24453
24454         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24455       }
24456       return;
24457
24458     /* Print a VFP/Neon double precision or quad precision register name.  */
24459     case 'P':
24460     case 'q':
24461       {
24462         machine_mode mode = GET_MODE (x);
24463         int is_quad = (code == 'q');
24464         int regno;
24465
24466         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24467           {
24468             output_operand_lossage ("invalid operand for code '%c'", code);
24469             return;
24470           }
24471
24472         if (!REG_P (x)
24473             || !IS_VFP_REGNUM (REGNO (x)))
24474           {
24475             output_operand_lossage ("invalid operand for code '%c'", code);
24476             return;
24477           }
24478
24479         regno = REGNO (x);
24480         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24481             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24482           {
24483             output_operand_lossage ("invalid operand for code '%c'", code);
24484             return;
24485           }
24486
24487         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24488           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24489       }
24490       return;
24491
24492     /* These two codes print the low/high doubleword register of a Neon quad
24493        register, respectively.  For pair-structure types, can also print
24494        low/high quadword registers.  */
24495     case 'e':
24496     case 'f':
24497       {
24498         machine_mode mode = GET_MODE (x);
24499         int regno;
24500
24501         if ((GET_MODE_SIZE (mode) != 16
24502              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24503           {
24504             output_operand_lossage ("invalid operand for code '%c'", code);
24505             return;
24506           }
24507
24508         regno = REGNO (x);
24509         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24510           {
24511             output_operand_lossage ("invalid operand for code '%c'", code);
24512             return;
24513           }
24514
24515         if (GET_MODE_SIZE (mode) == 16)
24516           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24517                                   + (code == 'f' ? 1 : 0));
24518         else
24519           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24520                                   + (code == 'f' ? 1 : 0));
24521       }
24522       return;
24523
24524     /* Print a VFPv3 floating-point constant, represented as an integer
24525        index.  */
24526     case 'G':
24527       {
24528         int index = vfp3_const_double_index (x);
24529         gcc_assert (index != -1);
24530         fprintf (stream, "%d", index);
24531       }
24532       return;
24533
24534     /* Print bits representing opcode features for Neon.
24535
24536        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24537        and polynomials as unsigned.
24538
24539        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24540
24541        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24542
24543     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24544     case 'T':
24545       {
24546         HOST_WIDE_INT bits = INTVAL (x);
24547         fputc ("uspf"[bits & 3], stream);
24548       }
24549       return;
24550
24551     /* Likewise, but signed and unsigned integers are both 'i'.  */
24552     case 'F':
24553       {
24554         HOST_WIDE_INT bits = INTVAL (x);
24555         fputc ("iipf"[bits & 3], stream);
24556       }
24557       return;
24558
24559     /* As for 'T', but emit 'u' instead of 'p'.  */
24560     case 't':
24561       {
24562         HOST_WIDE_INT bits = INTVAL (x);
24563         fputc ("usuf"[bits & 3], stream);
24564       }
24565       return;
24566
24567     /* Bit 2: rounding (vs none).  */
24568     case 'O':
24569       {
24570         HOST_WIDE_INT bits = INTVAL (x);
24571         fputs ((bits & 4) != 0 ? "r" : "", stream);
24572       }
24573       return;
24574
24575     /* Memory operand for vld1/vst1 instruction.  */
24576     case 'A':
24577       {
24578         rtx addr;
24579         bool postinc = FALSE;
24580         rtx postinc_reg = NULL;
24581         unsigned align, memsize, align_bits;
24582
24583         gcc_assert (MEM_P (x));
24584         addr = XEXP (x, 0);
24585         if (GET_CODE (addr) == POST_INC)
24586           {
24587             postinc = 1;
24588             addr = XEXP (addr, 0);
24589           }
24590         if (GET_CODE (addr) == POST_MODIFY)
24591           {
24592             postinc_reg = XEXP( XEXP (addr, 1), 1);
24593             addr = XEXP (addr, 0);
24594           }
24595         asm_fprintf (stream, "[%r", REGNO (addr));
24596
24597         /* We know the alignment of this access, so we can emit a hint in the
24598            instruction (for some alignments) as an aid to the memory subsystem
24599            of the target.  */
24600         align = MEM_ALIGN (x) >> 3;
24601         memsize = MEM_SIZE (x);
24602
24603         /* Only certain alignment specifiers are supported by the hardware.  */
24604         if (memsize == 32 && (align % 32) == 0)
24605           align_bits = 256;
24606         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24607           align_bits = 128;
24608         else if (memsize >= 8 && (align % 8) == 0)
24609           align_bits = 64;
24610         else
24611           align_bits = 0;
24612
24613         if (align_bits != 0)
24614           asm_fprintf (stream, ":%d", align_bits);
24615
24616         asm_fprintf (stream, "]");
24617
24618         if (postinc)
24619           fputs("!", stream);
24620         if (postinc_reg)
24621           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24622       }
24623       return;
24624
24625     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24626        rtx_code the memory operands output looks like following.
24627        1. [Rn], #+/-<imm>
24628        2. [Rn, #+/-<imm>]!
24629        3. [Rn, #+/-<imm>]
24630        4. [Rn].  */
24631     case 'E':
24632       {
24633         rtx addr;
24634         rtx postinc_reg = NULL;
24635         unsigned inc_val = 0;
24636         enum rtx_code code;
24637
24638         gcc_assert (MEM_P (x));
24639         addr = XEXP (x, 0);
24640         code = GET_CODE (addr);
24641         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24642             || code  == PRE_DEC)
24643           {
24644             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24645             inc_val = GET_MODE_SIZE (GET_MODE (x));
24646             if (code == POST_INC || code == POST_DEC)
24647               asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24648                                               ? "": "-", inc_val);
24649             else
24650               asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24651                                                ? "": "-", inc_val);
24652           }
24653         else if (code == POST_MODIFY || code == PRE_MODIFY)
24654           {
24655             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24656             postinc_reg = XEXP (XEXP (addr, 1), 1);
24657             if (postinc_reg && CONST_INT_P (postinc_reg))
24658               {
24659                 if (code == POST_MODIFY)
24660                   asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24661                 else
24662                   asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24663               }
24664           }
24665         else if (code == PLUS)
24666           {
24667             rtx base = XEXP (addr, 0);
24668             rtx index = XEXP (addr, 1);
24669
24670             gcc_assert (REG_P (base) && CONST_INT_P (index));
24671
24672             HOST_WIDE_INT offset = INTVAL (index);
24673             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24674           }
24675         else
24676           {
24677             gcc_assert (REG_P (addr));
24678             asm_fprintf (stream, "[%r]",REGNO (addr));
24679           }
24680       }
24681       return;
24682
24683     case 'C':
24684       {
24685         rtx addr;
24686
24687         gcc_assert (MEM_P (x));
24688         addr = XEXP (x, 0);
24689         gcc_assert (REG_P (addr));
24690         asm_fprintf (stream, "[%r]", REGNO (addr));
24691       }
24692       return;
24693
24694     /* Translate an S register number into a D register number and element index.  */
24695     case 'y':
24696       {
24697         machine_mode mode = GET_MODE (x);
24698         int regno;
24699
24700         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24701           {
24702             output_operand_lossage ("invalid operand for code '%c'", code);
24703             return;
24704           }
24705
24706         regno = REGNO (x);
24707         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24708           {
24709             output_operand_lossage ("invalid operand for code '%c'", code);
24710             return;
24711           }
24712
24713         regno = regno - FIRST_VFP_REGNUM;
24714         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24715       }
24716       return;
24717
24718     case 'v':
24719         gcc_assert (CONST_DOUBLE_P (x));
24720         int result;
24721         result = vfp3_const_double_for_fract_bits (x);
24722         if (result == 0)
24723           result = vfp3_const_double_for_bits (x);
24724         fprintf (stream, "#%d", result);
24725         return;
24726
24727     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24728        number into a D register number and element index.  */
24729     case 'z':
24730       {
24731         machine_mode mode = GET_MODE (x);
24732         int regno;
24733
24734         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24735           {
24736             output_operand_lossage ("invalid operand for code '%c'", code);
24737             return;
24738           }
24739
24740         regno = REGNO (x);
24741         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24742           {
24743             output_operand_lossage ("invalid operand for code '%c'", code);
24744             return;
24745           }
24746
24747         regno = regno - FIRST_VFP_REGNUM;
24748         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24749       }
24750       return;
24751
24752     default:
24753       if (x == 0)
24754         {
24755           output_operand_lossage ("missing operand");
24756           return;
24757         }
24758
24759       switch (GET_CODE (x))
24760         {
24761         case REG:
24762           asm_fprintf (stream, "%r", REGNO (x));
24763           break;
24764
24765         case MEM:
24766           output_address (GET_MODE (x), XEXP (x, 0));
24767           break;
24768
24769         case CONST_DOUBLE:
24770           {
24771             char fpstr[20];
24772             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24773                               sizeof (fpstr), 0, 1);
24774             fprintf (stream, "#%s", fpstr);
24775           }
24776           break;
24777
24778         default:
24779           gcc_assert (GET_CODE (x) != NEG);
24780           fputc ('#', stream);
24781           if (GET_CODE (x) == HIGH)
24782             {
24783               fputs (":lower16:", stream);
24784               x = XEXP (x, 0);
24785             }
24786
24787           output_addr_const (stream, x);
24788           break;
24789         }
24790     }
24791 }
24792 \f
24793 /* Target hook for printing a memory address.  */
24794 static void
24795 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24796 {
24797   if (TARGET_32BIT)
24798     {
24799       int is_minus = GET_CODE (x) == MINUS;
24800
24801       if (REG_P (x))
24802         asm_fprintf (stream, "[%r]", REGNO (x));
24803       else if (GET_CODE (x) == PLUS || is_minus)
24804         {
24805           rtx base = XEXP (x, 0);
24806           rtx index = XEXP (x, 1);
24807           HOST_WIDE_INT offset = 0;
24808           if (!REG_P (base)
24809               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24810             {
24811               /* Ensure that BASE is a register.  */
24812               /* (one of them must be).  */
24813               /* Also ensure the SP is not used as in index register.  */
24814               std::swap (base, index);
24815             }
24816           switch (GET_CODE (index))
24817             {
24818             case CONST_INT:
24819               offset = INTVAL (index);
24820               if (is_minus)
24821                 offset = -offset;
24822               asm_fprintf (stream, "[%r, #%wd]",
24823                            REGNO (base), offset);
24824               break;
24825
24826             case REG:
24827               asm_fprintf (stream, "[%r, %s%r]",
24828                            REGNO (base), is_minus ? "-" : "",
24829                            REGNO (index));
24830               break;
24831
24832             case MULT:
24833             case ASHIFTRT:
24834             case LSHIFTRT:
24835             case ASHIFT:
24836             case ROTATERT:
24837               {
24838                 asm_fprintf (stream, "[%r, %s%r",
24839                              REGNO (base), is_minus ? "-" : "",
24840                              REGNO (XEXP (index, 0)));
24841                 arm_print_operand (stream, index, 'S');
24842                 fputs ("]", stream);
24843                 break;
24844               }
24845
24846             default:
24847               gcc_unreachable ();
24848             }
24849         }
24850       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24851                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24852         {
24853           gcc_assert (REG_P (XEXP (x, 0)));
24854
24855           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24856             asm_fprintf (stream, "[%r, #%s%d]!",
24857                          REGNO (XEXP (x, 0)),
24858                          GET_CODE (x) == PRE_DEC ? "-" : "",
24859                          GET_MODE_SIZE (mode));
24860           else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24861             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24862           else
24863             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24864                          GET_CODE (x) == POST_DEC ? "-" : "",
24865                          GET_MODE_SIZE (mode));
24866         }
24867       else if (GET_CODE (x) == PRE_MODIFY)
24868         {
24869           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24870           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24871             asm_fprintf (stream, "#%wd]!",
24872                          INTVAL (XEXP (XEXP (x, 1), 1)));
24873           else
24874             asm_fprintf (stream, "%r]!",
24875                          REGNO (XEXP (XEXP (x, 1), 1)));
24876         }
24877       else if (GET_CODE (x) == POST_MODIFY)
24878         {
24879           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24880           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24881             asm_fprintf (stream, "#%wd",
24882                          INTVAL (XEXP (XEXP (x, 1), 1)));
24883           else
24884             asm_fprintf (stream, "%r",
24885                          REGNO (XEXP (XEXP (x, 1), 1)));
24886         }
24887       else output_addr_const (stream, x);
24888     }
24889   else
24890     {
24891       if (REG_P (x))
24892         asm_fprintf (stream, "[%r]", REGNO (x));
24893       else if (GET_CODE (x) == POST_INC)
24894         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24895       else if (GET_CODE (x) == PLUS)
24896         {
24897           gcc_assert (REG_P (XEXP (x, 0)));
24898           if (CONST_INT_P (XEXP (x, 1)))
24899             asm_fprintf (stream, "[%r, #%wd]",
24900                          REGNO (XEXP (x, 0)),
24901                          INTVAL (XEXP (x, 1)));
24902           else
24903             asm_fprintf (stream, "[%r, %r]",
24904                          REGNO (XEXP (x, 0)),
24905                          REGNO (XEXP (x, 1)));
24906         }
24907       else
24908         output_addr_const (stream, x);
24909     }
24910 }
24911 \f
24912 /* Target hook for indicating whether a punctuation character for
24913    TARGET_PRINT_OPERAND is valid.  */
24914 static bool
24915 arm_print_operand_punct_valid_p (unsigned char code)
24916 {
24917   return (code == '@' || code == '|' || code == '.'
24918           || code == '(' || code == ')' || code == '#'
24919           || (TARGET_32BIT && (code == '?'))
24920           || (TARGET_THUMB2 && (code == '!'))
24921           || (TARGET_THUMB && (code == '_')));
24922 }
24923 \f
24924 /* Target hook for assembling integer objects.  The ARM version needs to
24925    handle word-sized values specially.  */
24926 static bool
24927 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24928 {
24929   machine_mode mode;
24930
24931   if (size == UNITS_PER_WORD && aligned_p)
24932     {
24933       fputs ("\t.word\t", asm_out_file);
24934       output_addr_const (asm_out_file, x);
24935
24936       /* Mark symbols as position independent.  We only do this in the
24937          .text segment, not in the .data segment.  */
24938       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24939           (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24940         {
24941           /* See legitimize_pic_address for an explanation of the
24942              TARGET_VXWORKS_RTP check.  */
24943           /* References to weak symbols cannot be resolved locally:
24944              they may be overridden by a non-weak definition at link
24945              time.  */
24946           if (!arm_pic_data_is_text_relative
24947               || (SYMBOL_REF_P (x)
24948                   && (!SYMBOL_REF_LOCAL_P (x)
24949                       || (SYMBOL_REF_DECL (x)
24950                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24951                       || (SYMBOL_REF_FUNCTION_P (x)
24952                           && !arm_fdpic_local_funcdesc_p (x)))))
24953             {
24954               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24955                 fputs ("(GOTFUNCDESC)", asm_out_file);
24956               else
24957                 fputs ("(GOT)", asm_out_file);
24958             }
24959           else
24960             {
24961               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24962                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24963               else
24964                 {
24965                   bool is_readonly;
24966
24967                   if (!TARGET_FDPIC
24968                       || arm_is_segment_info_known (x, &is_readonly))
24969                     fputs ("(GOTOFF)", asm_out_file);
24970                   else
24971                     fputs ("(GOT)", asm_out_file);
24972                 }
24973             }
24974         }
24975
24976       /* For FDPIC we also have to mark symbol for .data section.  */
24977       if (TARGET_FDPIC
24978           && !making_const_table
24979           && SYMBOL_REF_P (x)
24980           && SYMBOL_REF_FUNCTION_P (x))
24981         fputs ("(FUNCDESC)", asm_out_file);
24982
24983       fputc ('\n', asm_out_file);
24984       return true;
24985     }
24986
24987   mode = GET_MODE (x);
24988
24989   if (arm_vector_mode_supported_p (mode))
24990     {
24991       int i, units;
24992
24993       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24994
24995       units = CONST_VECTOR_NUNITS (x);
24996       size = GET_MODE_UNIT_SIZE (mode);
24997
24998       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24999         for (i = 0; i < units; i++)
25000           {
25001             rtx elt = CONST_VECTOR_ELT (x, i);
25002             assemble_integer
25003               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
25004           }
25005       else
25006         for (i = 0; i < units; i++)
25007           {
25008             rtx elt = CONST_VECTOR_ELT (x, i);
25009             assemble_real
25010               (*CONST_DOUBLE_REAL_VALUE (elt),
25011                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
25012                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
25013           }
25014
25015       return true;
25016     }
25017
25018   return default_assemble_integer (x, size, aligned_p);
25019 }
25020
25021 static void
25022 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
25023 {
25024   section *s;
25025
25026   if (!TARGET_AAPCS_BASED)
25027     {
25028       (is_ctor ?
25029        default_named_section_asm_out_constructor
25030        : default_named_section_asm_out_destructor) (symbol, priority);
25031       return;
25032     }
25033
25034   /* Put these in the .init_array section, using a special relocation.  */
25035   if (priority != DEFAULT_INIT_PRIORITY)
25036     {
25037       char buf[18];
25038       sprintf (buf, "%s.%.5u",
25039                is_ctor ? ".init_array" : ".fini_array",
25040                priority);
25041       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
25042     }
25043   else if (is_ctor)
25044     s = ctors_section;
25045   else
25046     s = dtors_section;
25047
25048   switch_to_section (s);
25049   assemble_align (POINTER_SIZE);
25050   fputs ("\t.word\t", asm_out_file);
25051   output_addr_const (asm_out_file, symbol);
25052   fputs ("(target1)\n", asm_out_file);
25053 }
25054
25055 /* Add a function to the list of static constructors.  */
25056
25057 static void
25058 arm_elf_asm_constructor (rtx symbol, int priority)
25059 {
25060   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25061 }
25062
25063 /* Add a function to the list of static destructors.  */
25064
25065 static void
25066 arm_elf_asm_destructor (rtx symbol, int priority)
25067 {
25068   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25069 }
25070 \f
25071 /* A finite state machine takes care of noticing whether or not instructions
25072    can be conditionally executed, and thus decrease execution time and code
25073    size by deleting branch instructions.  The fsm is controlled by
25074    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
25075
25076 /* The state of the fsm controlling condition codes are:
25077    0: normal, do nothing special
25078    1: make ASM_OUTPUT_OPCODE not output this instruction
25079    2: make ASM_OUTPUT_OPCODE not output this instruction
25080    3: make instructions conditional
25081    4: make instructions conditional
25082
25083    State transitions (state->state by whom under condition):
25084    0 -> 1 final_prescan_insn if the `target' is a label
25085    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25086    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25087    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25088    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25089           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25090    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25091           (the target insn is arm_target_insn).
25092
25093    If the jump clobbers the conditions then we use states 2 and 4.
25094
25095    A similar thing can be done with conditional return insns.
25096
25097    XXX In case the `target' is an unconditional branch, this conditionalising
25098    of the instructions always reduces code size, but not always execution
25099    time.  But then, I want to reduce the code size to somewhere near what
25100    /bin/cc produces.  */
25101
25102 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25103    instructions.  When a COND_EXEC instruction is seen the subsequent
25104    instructions are scanned so that multiple conditional instructions can be
25105    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
25106    specify the length and true/false mask for the IT block.  These will be
25107    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
25108
25109 /* Returns the index of the ARM condition code string in
25110    `arm_condition_codes', or ARM_NV if the comparison is invalid.
25111    COMPARISON should be an rtx like `(eq (...) (...))'.  */
25112
25113 enum arm_cond_code
25114 maybe_get_arm_condition_code (rtx comparison)
25115 {
25116   machine_mode mode = GET_MODE (XEXP (comparison, 0));
25117   enum arm_cond_code code;
25118   enum rtx_code comp_code = GET_CODE (comparison);
25119
25120   if (GET_MODE_CLASS (mode) != MODE_CC)
25121     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25122                            XEXP (comparison, 1));
25123
25124   switch (mode)
25125     {
25126     case E_CC_DNEmode: code = ARM_NE; goto dominance;
25127     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25128     case E_CC_DGEmode: code = ARM_GE; goto dominance;
25129     case E_CC_DGTmode: code = ARM_GT; goto dominance;
25130     case E_CC_DLEmode: code = ARM_LE; goto dominance;
25131     case E_CC_DLTmode: code = ARM_LT; goto dominance;
25132     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25133     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25134     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25135     case E_CC_DLTUmode: code = ARM_CC;
25136
25137     dominance:
25138       if (comp_code == EQ)
25139         return ARM_INVERSE_CONDITION_CODE (code);
25140       if (comp_code == NE)
25141         return code;
25142       return ARM_NV;
25143
25144     case E_CC_NZmode:
25145       switch (comp_code)
25146         {
25147         case NE: return ARM_NE;
25148         case EQ: return ARM_EQ;
25149         case GE: return ARM_PL;
25150         case LT: return ARM_MI;
25151         default: return ARM_NV;
25152         }
25153
25154     case E_CC_Zmode:
25155       switch (comp_code)
25156         {
25157         case NE: return ARM_NE;
25158         case EQ: return ARM_EQ;
25159         default: return ARM_NV;
25160         }
25161
25162     case E_CC_Nmode:
25163       switch (comp_code)
25164         {
25165         case NE: return ARM_MI;
25166         case EQ: return ARM_PL;
25167         default: return ARM_NV;
25168         }
25169
25170     case E_CCFPEmode:
25171     case E_CCFPmode:
25172       /* We can handle all cases except UNEQ and LTGT.  */
25173       switch (comp_code)
25174         {
25175         case GE: return ARM_GE;
25176         case GT: return ARM_GT;
25177         case LE: return ARM_LS;
25178         case LT: return ARM_MI;
25179         case NE: return ARM_NE;
25180         case EQ: return ARM_EQ;
25181         case ORDERED: return ARM_VC;
25182         case UNORDERED: return ARM_VS;
25183         case UNLT: return ARM_LT;
25184         case UNLE: return ARM_LE;
25185         case UNGT: return ARM_HI;
25186         case UNGE: return ARM_PL;
25187           /* UNEQ and LTGT do not have a representation.  */
25188         case UNEQ: /* Fall through.  */
25189         case LTGT: /* Fall through.  */
25190         default: return ARM_NV;
25191         }
25192
25193     case E_CC_SWPmode:
25194       switch (comp_code)
25195         {
25196         case NE: return ARM_NE;
25197         case EQ: return ARM_EQ;
25198         case GE: return ARM_LE;
25199         case GT: return ARM_LT;
25200         case LE: return ARM_GE;
25201         case LT: return ARM_GT;
25202         case GEU: return ARM_LS;
25203         case GTU: return ARM_CC;
25204         case LEU: return ARM_CS;
25205         case LTU: return ARM_HI;
25206         default: return ARM_NV;
25207         }
25208
25209     case E_CC_Cmode:
25210       switch (comp_code)
25211         {
25212         case LTU: return ARM_CS;
25213         case GEU: return ARM_CC;
25214         default: return ARM_NV;
25215         }
25216
25217     case E_CC_NVmode:
25218       switch (comp_code)
25219         {
25220         case GE: return ARM_GE;
25221         case LT: return ARM_LT;
25222         default: return ARM_NV;
25223         }
25224
25225     case E_CC_Bmode:
25226       switch (comp_code)
25227         {
25228         case GEU: return ARM_CS;
25229         case LTU: return ARM_CC;
25230         default: return ARM_NV;
25231         }
25232
25233     case E_CC_Vmode:
25234       switch (comp_code)
25235         {
25236         case NE: return ARM_VS;
25237         case EQ: return ARM_VC;
25238         default: return ARM_NV;
25239         }
25240
25241     case E_CC_ADCmode:
25242       switch (comp_code)
25243         {
25244         case GEU: return ARM_CS;
25245         case LTU: return ARM_CC;
25246         default: return ARM_NV;
25247         }
25248
25249     case E_CCmode:
25250     case E_CC_RSBmode:
25251       switch (comp_code)
25252         {
25253         case NE: return ARM_NE;
25254         case EQ: return ARM_EQ;
25255         case GE: return ARM_GE;
25256         case GT: return ARM_GT;
25257         case LE: return ARM_LE;
25258         case LT: return ARM_LT;
25259         case GEU: return ARM_CS;
25260         case GTU: return ARM_HI;
25261         case LEU: return ARM_LS;
25262         case LTU: return ARM_CC;
25263         default: return ARM_NV;
25264         }
25265
25266     default: gcc_unreachable ();
25267     }
25268 }
25269
25270 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
25271 static enum arm_cond_code
25272 get_arm_condition_code (rtx comparison)
25273 {
25274   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25275   gcc_assert (code != ARM_NV);
25276   return code;
25277 }
25278
25279 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
25280    code registers when not targetting Thumb1.  The VFP condition register
25281    only exists when generating hard-float code.  */
25282 static bool
25283 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25284 {
25285   if (!TARGET_32BIT)
25286     return false;
25287
25288   *p1 = CC_REGNUM;
25289   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25290   return true;
25291 }
25292
25293 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25294    instructions.  */
25295 void
25296 thumb2_final_prescan_insn (rtx_insn *insn)
25297 {
25298   rtx_insn *first_insn = insn;
25299   rtx body = PATTERN (insn);
25300   rtx predicate;
25301   enum arm_cond_code code;
25302   int n;
25303   int mask;
25304   int max;
25305
25306   /* max_insns_skipped in the tune was already taken into account in the
25307      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
25308      just emit the IT blocks as we can.  It does not make sense to split
25309      the IT blocks.  */
25310   max = MAX_INSN_PER_IT_BLOCK;
25311
25312   /* Remove the previous insn from the count of insns to be output.  */
25313   if (arm_condexec_count)
25314       arm_condexec_count--;
25315
25316   /* Nothing to do if we are already inside a conditional block.  */
25317   if (arm_condexec_count)
25318     return;
25319
25320   if (GET_CODE (body) != COND_EXEC)
25321     return;
25322
25323   /* Conditional jumps are implemented directly.  */
25324   if (JUMP_P (insn))
25325     return;
25326
25327   predicate = COND_EXEC_TEST (body);
25328   arm_current_cc = get_arm_condition_code (predicate);
25329
25330   n = get_attr_ce_count (insn);
25331   arm_condexec_count = 1;
25332   arm_condexec_mask = (1 << n) - 1;
25333   arm_condexec_masklen = n;
25334   /* See if subsequent instructions can be combined into the same block.  */
25335   for (;;)
25336     {
25337       insn = next_nonnote_insn (insn);
25338
25339       /* Jumping into the middle of an IT block is illegal, so a label or
25340          barrier terminates the block.  */
25341       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25342         break;
25343
25344       body = PATTERN (insn);
25345       /* USE and CLOBBER aren't really insns, so just skip them.  */
25346       if (GET_CODE (body) == USE
25347           || GET_CODE (body) == CLOBBER)
25348         continue;
25349
25350       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
25351       if (GET_CODE (body) != COND_EXEC)
25352         break;
25353       /* Maximum number of conditionally executed instructions in a block.  */
25354       n = get_attr_ce_count (insn);
25355       if (arm_condexec_masklen + n > max)
25356         break;
25357
25358       predicate = COND_EXEC_TEST (body);
25359       code = get_arm_condition_code (predicate);
25360       mask = (1 << n) - 1;
25361       if (arm_current_cc == code)
25362         arm_condexec_mask |= (mask << arm_condexec_masklen);
25363       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25364         break;
25365
25366       arm_condexec_count++;
25367       arm_condexec_masklen += n;
25368
25369       /* A jump must be the last instruction in a conditional block.  */
25370       if (JUMP_P (insn))
25371         break;
25372     }
25373   /* Restore recog_data (getting the attributes of other insns can
25374      destroy this array, but final.cc assumes that it remains intact
25375      across this call).  */
25376   extract_constrain_insn_cached (first_insn);
25377 }
25378
25379 void
25380 arm_final_prescan_insn (rtx_insn *insn)
25381 {
25382   /* BODY will hold the body of INSN.  */
25383   rtx body = PATTERN (insn);
25384
25385   /* This will be 1 if trying to repeat the trick, and things need to be
25386      reversed if it appears to fail.  */
25387   int reverse = 0;
25388
25389   /* If we start with a return insn, we only succeed if we find another one.  */
25390   int seeking_return = 0;
25391   enum rtx_code return_code = UNKNOWN;
25392
25393   /* START_INSN will hold the insn from where we start looking.  This is the
25394      first insn after the following code_label if REVERSE is true.  */
25395   rtx_insn *start_insn = insn;
25396
25397   /* If in state 4, check if the target branch is reached, in order to
25398      change back to state 0.  */
25399   if (arm_ccfsm_state == 4)
25400     {
25401       if (insn == arm_target_insn)
25402         {
25403           arm_target_insn = NULL;
25404           arm_ccfsm_state = 0;
25405         }
25406       return;
25407     }
25408
25409   /* If in state 3, it is possible to repeat the trick, if this insn is an
25410      unconditional branch to a label, and immediately following this branch
25411      is the previous target label which is only used once, and the label this
25412      branch jumps to is not too far off.  */
25413   if (arm_ccfsm_state == 3)
25414     {
25415       if (simplejump_p (insn))
25416         {
25417           start_insn = next_nonnote_insn (start_insn);
25418           if (BARRIER_P (start_insn))
25419             {
25420               /* XXX Isn't this always a barrier?  */
25421               start_insn = next_nonnote_insn (start_insn);
25422             }
25423           if (LABEL_P (start_insn)
25424               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25425               && LABEL_NUSES (start_insn) == 1)
25426             reverse = TRUE;
25427           else
25428             return;
25429         }
25430       else if (ANY_RETURN_P (body))
25431         {
25432           start_insn = next_nonnote_insn (start_insn);
25433           if (BARRIER_P (start_insn))
25434             start_insn = next_nonnote_insn (start_insn);
25435           if (LABEL_P (start_insn)
25436               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25437               && LABEL_NUSES (start_insn) == 1)
25438             {
25439               reverse = TRUE;
25440               seeking_return = 1;
25441               return_code = GET_CODE (body);
25442             }
25443           else
25444             return;
25445         }
25446       else
25447         return;
25448     }
25449
25450   gcc_assert (!arm_ccfsm_state || reverse);
25451   if (!JUMP_P (insn))
25452     return;
25453
25454   /* This jump might be paralleled with a clobber of the condition codes
25455      the jump should always come first */
25456   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25457     body = XVECEXP (body, 0, 0);
25458
25459   if (reverse
25460       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25461           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25462     {
25463       int insns_skipped;
25464       int fail = FALSE, succeed = FALSE;
25465       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25466       int then_not_else = TRUE;
25467       rtx_insn *this_insn = start_insn;
25468       rtx label = 0;
25469
25470       /* Register the insn jumped to.  */
25471       if (reverse)
25472         {
25473           if (!seeking_return)
25474             label = XEXP (SET_SRC (body), 0);
25475         }
25476       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25477         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25478       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25479         {
25480           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25481           then_not_else = FALSE;
25482         }
25483       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25484         {
25485           seeking_return = 1;
25486           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25487         }
25488       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25489         {
25490           seeking_return = 1;
25491           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25492           then_not_else = FALSE;
25493         }
25494       else
25495         gcc_unreachable ();
25496
25497       /* See how many insns this branch skips, and what kind of insns.  If all
25498          insns are okay, and the label or unconditional branch to the same
25499          label is not too far away, succeed.  */
25500       for (insns_skipped = 0;
25501            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25502         {
25503           rtx scanbody;
25504
25505           this_insn = next_nonnote_insn (this_insn);
25506           if (!this_insn)
25507             break;
25508
25509           switch (GET_CODE (this_insn))
25510             {
25511             case CODE_LABEL:
25512               /* Succeed if it is the target label, otherwise fail since
25513                  control falls in from somewhere else.  */
25514               if (this_insn == label)
25515                 {
25516                   arm_ccfsm_state = 1;
25517                   succeed = TRUE;
25518                 }
25519               else
25520                 fail = TRUE;
25521               break;
25522
25523             case BARRIER:
25524               /* Succeed if the following insn is the target label.
25525                  Otherwise fail.
25526                  If return insns are used then the last insn in a function
25527                  will be a barrier.  */
25528               this_insn = next_nonnote_insn (this_insn);
25529               if (this_insn && this_insn == label)
25530                 {
25531                   arm_ccfsm_state = 1;
25532                   succeed = TRUE;
25533                 }
25534               else
25535                 fail = TRUE;
25536               break;
25537
25538             case CALL_INSN:
25539               /* The AAPCS says that conditional calls should not be
25540                  used since they make interworking inefficient (the
25541                  linker can't transform BL<cond> into BLX).  That's
25542                  only a problem if the machine has BLX.  */
25543               if (arm_arch5t)
25544                 {
25545                   fail = TRUE;
25546                   break;
25547                 }
25548
25549               /* Succeed if the following insn is the target label, or
25550                  if the following two insns are a barrier and the
25551                  target label.  */
25552               this_insn = next_nonnote_insn (this_insn);
25553               if (this_insn && BARRIER_P (this_insn))
25554                 this_insn = next_nonnote_insn (this_insn);
25555
25556               if (this_insn && this_insn == label
25557                   && insns_skipped < max_insns_skipped)
25558                 {
25559                   arm_ccfsm_state = 1;
25560                   succeed = TRUE;
25561                 }
25562               else
25563                 fail = TRUE;
25564               break;
25565
25566             case JUMP_INSN:
25567               /* If this is an unconditional branch to the same label, succeed.
25568                  If it is to another label, do nothing.  If it is conditional,
25569                  fail.  */
25570               /* XXX Probably, the tests for SET and the PC are
25571                  unnecessary.  */
25572
25573               scanbody = PATTERN (this_insn);
25574               if (GET_CODE (scanbody) == SET
25575                   && GET_CODE (SET_DEST (scanbody)) == PC)
25576                 {
25577                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25578                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25579                     {
25580                       arm_ccfsm_state = 2;
25581                       succeed = TRUE;
25582                     }
25583                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25584                     fail = TRUE;
25585                 }
25586               /* Fail if a conditional return is undesirable (e.g. on a
25587                  StrongARM), but still allow this if optimizing for size.  */
25588               else if (GET_CODE (scanbody) == return_code
25589                        && !use_return_insn (TRUE, NULL)
25590                        && !optimize_size)
25591                 fail = TRUE;
25592               else if (GET_CODE (scanbody) == return_code)
25593                 {
25594                   arm_ccfsm_state = 2;
25595                   succeed = TRUE;
25596                 }
25597               else if (GET_CODE (scanbody) == PARALLEL)
25598                 {
25599                   switch (get_attr_conds (this_insn))
25600                     {
25601                     case CONDS_NOCOND:
25602                       break;
25603                     default:
25604                       fail = TRUE;
25605                       break;
25606                     }
25607                 }
25608               else
25609                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25610
25611               break;
25612
25613             case INSN:
25614               /* Instructions using or affecting the condition codes make it
25615                  fail.  */
25616               scanbody = PATTERN (this_insn);
25617               if (!(GET_CODE (scanbody) == SET
25618                     || GET_CODE (scanbody) == PARALLEL)
25619                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25620                 fail = TRUE;
25621               break;
25622
25623             default:
25624               break;
25625             }
25626         }
25627       if (succeed)
25628         {
25629           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25630             arm_target_label = CODE_LABEL_NUMBER (label);
25631           else
25632             {
25633               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25634
25635               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25636                 {
25637                   this_insn = next_nonnote_insn (this_insn);
25638                   gcc_assert (!this_insn
25639                               || (!BARRIER_P (this_insn)
25640                                   && !LABEL_P (this_insn)));
25641                 }
25642               if (!this_insn)
25643                 {
25644                   /* Oh, dear! we ran off the end.. give up.  */
25645                   extract_constrain_insn_cached (insn);
25646                   arm_ccfsm_state = 0;
25647                   arm_target_insn = NULL;
25648                   return;
25649                 }
25650               arm_target_insn = this_insn;
25651             }
25652
25653           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25654              what it was.  */
25655           if (!reverse)
25656             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25657
25658           if (reverse || then_not_else)
25659             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25660         }
25661
25662       /* Restore recog_data (getting the attributes of other insns can
25663          destroy this array, but final.cc assumes that it remains intact
25664          across this call.  */
25665       extract_constrain_insn_cached (insn);
25666     }
25667 }
25668
25669 /* Output IT instructions.  */
25670 void
25671 thumb2_asm_output_opcode (FILE * stream)
25672 {
25673   char buff[5];
25674   int n;
25675
25676   if (arm_condexec_mask)
25677     {
25678       for (n = 0; n < arm_condexec_masklen; n++)
25679         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25680       buff[n] = 0;
25681       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25682                   arm_condition_codes[arm_current_cc]);
25683       arm_condexec_mask = 0;
25684     }
25685 }
25686
25687 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25688    UNITS_PER_WORD bytes wide.  */
25689 static unsigned int
25690 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25691 {
25692   if (IS_VPR_REGNUM (regno))
25693     return CEIL (GET_MODE_SIZE (mode), 2);
25694
25695   if (TARGET_32BIT
25696       && regno > PC_REGNUM
25697       && regno != FRAME_POINTER_REGNUM
25698       && regno != ARG_POINTER_REGNUM
25699       && !IS_VFP_REGNUM (regno))
25700     return 1;
25701
25702   return ARM_NUM_REGS (mode);
25703 }
25704
25705 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25706 static bool
25707 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25708 {
25709   if (GET_MODE_CLASS (mode) == MODE_CC)
25710     return (regno == CC_REGNUM
25711             || (TARGET_VFP_BASE
25712                 && regno == VFPCC_REGNUM));
25713
25714   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25715     return false;
25716
25717   if (IS_VPR_REGNUM (regno))
25718     return VALID_MVE_PRED_MODE (mode);
25719
25720   if (TARGET_THUMB1)
25721     /* For the Thumb we only allow values bigger than SImode in
25722        registers 0 - 6, so that there is always a second low
25723        register available to hold the upper part of the value.
25724        We probably we ought to ensure that the register is the
25725        start of an even numbered register pair.  */
25726     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25727
25728   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25729     {
25730       if (mode == DFmode || mode == DImode)
25731         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25732
25733       if (mode == HFmode || mode == BFmode || mode == HImode
25734           || mode == SFmode || mode == SImode)
25735         return VFP_REGNO_OK_FOR_SINGLE (regno);
25736
25737       if (TARGET_NEON)
25738         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25739                || (VALID_NEON_QREG_MODE (mode)
25740                    && NEON_REGNO_OK_FOR_QUAD (regno))
25741                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25742                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25743                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25744                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25745                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25746      if (TARGET_HAVE_MVE)
25747        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25748                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25749                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25750
25751       return false;
25752     }
25753
25754   if (TARGET_REALLY_IWMMXT)
25755     {
25756       if (IS_IWMMXT_GR_REGNUM (regno))
25757         return mode == SImode;
25758
25759       if (IS_IWMMXT_REGNUM (regno))
25760         return VALID_IWMMXT_REG_MODE (mode);
25761     }
25762
25763   /* We allow almost any value to be stored in the general registers.
25764      Restrict doubleword quantities to even register pairs in ARM state
25765      so that we can use ldrd. The same restriction applies for MVE
25766      in order to support Armv8.1-M Mainline instructions.
25767      Do not allow very large Neon structure  opaque modes in general
25768      registers; they would use too many.  */
25769   if (regno <= LAST_ARM_REGNUM)
25770     {
25771       if (ARM_NUM_REGS (mode) > 4)
25772         return false;
25773
25774       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25775         return true;
25776
25777       return !((TARGET_LDRD || TARGET_CDE)
25778                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25779     }
25780
25781   if (regno == FRAME_POINTER_REGNUM
25782       || regno == ARG_POINTER_REGNUM)
25783     /* We only allow integers in the fake hard registers.  */
25784     return GET_MODE_CLASS (mode) == MODE_INT;
25785
25786   return false;
25787 }
25788
25789 /* Implement TARGET_MODES_TIEABLE_P.  */
25790
25791 static bool
25792 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25793 {
25794   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25795     return true;
25796
25797   if (TARGET_HAVE_MVE
25798       && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25799     return true;
25800
25801   /* We specifically want to allow elements of "structure" modes to
25802      be tieable to the structure.  This more general condition allows
25803      other rarer situations too.  */
25804   if ((TARGET_NEON
25805        && (VALID_NEON_DREG_MODE (mode1)
25806            || VALID_NEON_QREG_MODE (mode1)
25807            || VALID_NEON_STRUCT_MODE (mode1))
25808        && (VALID_NEON_DREG_MODE (mode2)
25809            || VALID_NEON_QREG_MODE (mode2)
25810            || VALID_NEON_STRUCT_MODE (mode2)))
25811       || (TARGET_HAVE_MVE
25812           && (VALID_MVE_MODE (mode1)
25813               || VALID_MVE_STRUCT_MODE (mode1))
25814           && (VALID_MVE_MODE (mode2)
25815               || VALID_MVE_STRUCT_MODE (mode2))))
25816     return true;
25817
25818   return false;
25819 }
25820
25821 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25822    not used in arm mode.  */
25823
25824 enum reg_class
25825 arm_regno_class (int regno)
25826 {
25827   if (regno == PC_REGNUM)
25828     return NO_REGS;
25829
25830   if (IS_VPR_REGNUM (regno))
25831     return VPR_REG;
25832
25833   if (IS_PAC_REGNUM (regno))
25834     return PAC_REG;
25835
25836   if (TARGET_THUMB1)
25837     {
25838       if (regno == STACK_POINTER_REGNUM)
25839         return STACK_REG;
25840       if (regno == CC_REGNUM)
25841         return CC_REG;
25842       if (regno < 8)
25843         return LO_REGS;
25844       return HI_REGS;
25845     }
25846
25847   if (TARGET_THUMB2 && regno < 8)
25848     return LO_REGS;
25849
25850   if (   regno <= LAST_ARM_REGNUM
25851       || regno == FRAME_POINTER_REGNUM
25852       || regno == ARG_POINTER_REGNUM)
25853     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25854
25855   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25856     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25857
25858   if (IS_VFP_REGNUM (regno))
25859     {
25860       if (regno <= D7_VFP_REGNUM)
25861         return VFP_D0_D7_REGS;
25862       else if (regno <= LAST_LO_VFP_REGNUM)
25863         return VFP_LO_REGS;
25864       else
25865         return VFP_HI_REGS;
25866     }
25867
25868   if (IS_IWMMXT_REGNUM (regno))
25869     return IWMMXT_REGS;
25870
25871   if (IS_IWMMXT_GR_REGNUM (regno))
25872     return IWMMXT_GR_REGS;
25873
25874   return NO_REGS;
25875 }
25876
25877 /* Handle a special case when computing the offset
25878    of an argument from the frame pointer.  */
25879 int
25880 arm_debugger_arg_offset (int value, rtx addr)
25881 {
25882   rtx_insn *insn;
25883
25884   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25885   if (value != 0)
25886     return 0;
25887
25888   /* We can only cope with the case where the address is held in a register.  */
25889   if (!REG_P (addr))
25890     return 0;
25891
25892   /* If we are using the frame pointer to point at the argument, then
25893      an offset of 0 is correct.  */
25894   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25895     return 0;
25896
25897   /* If we are using the stack pointer to point at the
25898      argument, then an offset of 0 is correct.  */
25899   /* ??? Check this is consistent with thumb2 frame layout.  */
25900   if ((TARGET_THUMB || !frame_pointer_needed)
25901       && REGNO (addr) == SP_REGNUM)
25902     return 0;
25903
25904   /* Oh dear.  The argument is pointed to by a register rather
25905      than being held in a register, or being stored at a known
25906      offset from the frame pointer.  Since GDB only understands
25907      those two kinds of argument we must translate the address
25908      held in the register into an offset from the frame pointer.
25909      We do this by searching through the insns for the function
25910      looking to see where this register gets its value.  If the
25911      register is initialized from the frame pointer plus an offset
25912      then we are in luck and we can continue, otherwise we give up.
25913
25914      This code is exercised by producing debugging information
25915      for a function with arguments like this:
25916
25917            double func (double a, double b, int c, double d) {return d;}
25918
25919      Without this code the stab for parameter 'd' will be set to
25920      an offset of 0 from the frame pointer, rather than 8.  */
25921
25922   /* The if() statement says:
25923
25924      If the insn is a normal instruction
25925      and if the insn is setting the value in a register
25926      and if the register being set is the register holding the address of the argument
25927      and if the address is computing by an addition
25928      that involves adding to a register
25929      which is the frame pointer
25930      a constant integer
25931
25932      then...  */
25933
25934   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25935     {
25936       if (   NONJUMP_INSN_P (insn)
25937           && GET_CODE (PATTERN (insn)) == SET
25938           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25939           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25940           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25941           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25942           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25943              )
25944         {
25945           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25946
25947           break;
25948         }
25949     }
25950
25951   if (value == 0)
25952     {
25953       debug_rtx (addr);
25954       warning (0, "unable to compute real location of stacked parameter");
25955       value = 8; /* XXX magic hack */
25956     }
25957
25958   return value;
25959 }
25960 \f
25961 /* Implement TARGET_PROMOTED_TYPE.  */
25962
25963 static tree
25964 arm_promoted_type (const_tree t)
25965 {
25966   if (SCALAR_FLOAT_TYPE_P (t)
25967       && TYPE_PRECISION (t) == 16
25968       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25969     return float_type_node;
25970   return NULL_TREE;
25971 }
25972
25973 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25974    This simply adds HFmode as a supported mode; even though we don't
25975    implement arithmetic on this type directly, it's supported by
25976    optabs conversions, much the way the double-word arithmetic is
25977    special-cased in the default hook.  */
25978
25979 static bool
25980 arm_scalar_mode_supported_p (scalar_mode mode)
25981 {
25982   if (mode == HFmode)
25983     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25984   else if (ALL_FIXED_POINT_MODE_P (mode))
25985     return true;
25986   else
25987     return default_scalar_mode_supported_p (mode);
25988 }
25989
25990 /* Set the value of FLT_EVAL_METHOD.
25991    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25992
25993     0: evaluate all operations and constants, whose semantic type has at
25994        most the range and precision of type float, to the range and
25995        precision of float; evaluate all other operations and constants to
25996        the range and precision of the semantic type;
25997
25998     N, where _FloatN is a supported interchange floating type
25999        evaluate all operations and constants, whose semantic type has at
26000        most the range and precision of _FloatN type, to the range and
26001        precision of the _FloatN type; evaluate all other operations and
26002        constants to the range and precision of the semantic type;
26003
26004    If we have the ARMv8.2-A extensions then we support _Float16 in native
26005    precision, so we should set this to 16.  Otherwise, we support the type,
26006    but want to evaluate expressions in float precision, so set this to
26007    0.  */
26008
26009 static enum flt_eval_method
26010 arm_excess_precision (enum excess_precision_type type)
26011 {
26012   switch (type)
26013     {
26014       case EXCESS_PRECISION_TYPE_FAST:
26015       case EXCESS_PRECISION_TYPE_STANDARD:
26016         /* We can calculate either in 16-bit range and precision or
26017            32-bit range and precision.  Make that decision based on whether
26018            we have native support for the ARMv8.2-A 16-bit floating-point
26019            instructions or not.  */
26020         return (TARGET_VFP_FP16INST
26021                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26022                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
26023       case EXCESS_PRECISION_TYPE_IMPLICIT:
26024       case EXCESS_PRECISION_TYPE_FLOAT16:
26025         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26026       default:
26027         gcc_unreachable ();
26028     }
26029   return FLT_EVAL_METHOD_UNPREDICTABLE;
26030 }
26031
26032
26033 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
26034    _Float16 if we are using anything other than ieee format for 16-bit
26035    floating point.  Otherwise, punt to the default implementation.  */
26036 static opt_scalar_float_mode
26037 arm_floatn_mode (int n, bool extended)
26038 {
26039   if (!extended && n == 16)
26040     {
26041       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
26042         return HFmode;
26043       return opt_scalar_float_mode ();
26044     }
26045
26046   return default_floatn_mode (n, extended);
26047 }
26048
26049
26050 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26051    not to early-clobber SRC registers in the process.
26052
26053    We assume that the operands described by SRC and DEST represent a
26054    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
26055    number of components into which the copy has been decomposed.  */
26056 void
26057 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26058 {
26059   unsigned int i;
26060
26061   if (!reg_overlap_mentioned_p (operands[0], operands[1])
26062       || REGNO (operands[0]) < REGNO (operands[1]))
26063     {
26064       for (i = 0; i < count; i++)
26065         {
26066           operands[2 * i] = dest[i];
26067           operands[2 * i + 1] = src[i];
26068         }
26069     }
26070   else
26071     {
26072       for (i = 0; i < count; i++)
26073         {
26074           operands[2 * i] = dest[count - i - 1];
26075           operands[2 * i + 1] = src[count - i - 1];
26076         }
26077     }
26078 }
26079
26080 /* Split operands into moves from op[1] + op[2] into op[0].  */
26081
26082 void
26083 neon_split_vcombine (rtx operands[3])
26084 {
26085   unsigned int dest = REGNO (operands[0]);
26086   unsigned int src1 = REGNO (operands[1]);
26087   unsigned int src2 = REGNO (operands[2]);
26088   machine_mode halfmode = GET_MODE (operands[1]);
26089   unsigned int halfregs = REG_NREGS (operands[1]);
26090   rtx destlo, desthi;
26091
26092   if (src1 == dest && src2 == dest + halfregs)
26093     {
26094       /* No-op move.  Can't split to nothing; emit something.  */
26095       emit_note (NOTE_INSN_DELETED);
26096       return;
26097     }
26098
26099   /* Preserve register attributes for variable tracking.  */
26100   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26101   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26102                                GET_MODE_SIZE (halfmode));
26103
26104   /* Special case of reversed high/low parts.  Use VSWP.  */
26105   if (src2 == dest && src1 == dest + halfregs)
26106     {
26107       rtx x = gen_rtx_SET (destlo, operands[1]);
26108       rtx y = gen_rtx_SET (desthi, operands[2]);
26109       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26110       return;
26111     }
26112
26113   if (!reg_overlap_mentioned_p (operands[2], destlo))
26114     {
26115       /* Try to avoid unnecessary moves if part of the result
26116          is in the right place already.  */
26117       if (src1 != dest)
26118         emit_move_insn (destlo, operands[1]);
26119       if (src2 != dest + halfregs)
26120         emit_move_insn (desthi, operands[2]);
26121     }
26122   else
26123     {
26124       if (src2 != dest + halfregs)
26125         emit_move_insn (desthi, operands[2]);
26126       if (src1 != dest)
26127         emit_move_insn (destlo, operands[1]);
26128     }
26129 }
26130 \f
26131 /* Return the number (counting from 0) of
26132    the least significant set bit in MASK.  */
26133
26134 inline static int
26135 number_of_first_bit_set (unsigned mask)
26136 {
26137   return ctz_hwi (mask);
26138 }
26139
26140 /* Like emit_multi_reg_push, but allowing for a different set of
26141    registers to be described as saved.  MASK is the set of registers
26142    to be saved; REAL_REGS is the set of registers to be described as
26143    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26144
26145 static rtx_insn *
26146 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26147 {
26148   unsigned long regno;
26149   rtx par[10], tmp, reg;
26150   rtx_insn *insn;
26151   int i, j;
26152
26153   /* Build the parallel of the registers actually being stored.  */
26154   for (i = 0; mask; ++i, mask &= mask - 1)
26155     {
26156       regno = ctz_hwi (mask);
26157       reg = gen_rtx_REG (SImode, regno);
26158
26159       if (i == 0)
26160         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26161       else
26162         tmp = gen_rtx_USE (VOIDmode, reg);
26163
26164       par[i] = tmp;
26165     }
26166
26167   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26168   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26169   tmp = gen_frame_mem (BLKmode, tmp);
26170   tmp = gen_rtx_SET (tmp, par[0]);
26171   par[0] = tmp;
26172
26173   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26174   insn = emit_insn (tmp);
26175
26176   /* Always build the stack adjustment note for unwind info.  */
26177   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26178   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26179   par[0] = tmp;
26180
26181   /* Build the parallel of the registers recorded as saved for unwind.  */
26182   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26183     {
26184       regno = ctz_hwi (real_regs);
26185       reg = gen_rtx_REG (SImode, regno);
26186
26187       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26188       tmp = gen_frame_mem (SImode, tmp);
26189       tmp = gen_rtx_SET (tmp, reg);
26190       RTX_FRAME_RELATED_P (tmp) = 1;
26191       par[j + 1] = tmp;
26192     }
26193
26194   if (j == 0)
26195     tmp = par[0];
26196   else
26197     {
26198       RTX_FRAME_RELATED_P (par[0]) = 1;
26199       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26200     }
26201
26202   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26203
26204   return insn;
26205 }
26206
26207 /* Emit code to push or pop registers to or from the stack.  F is the
26208    assembly file.  MASK is the registers to pop.  */
26209 static void
26210 thumb_pop (FILE *f, unsigned long mask)
26211 {
26212   int regno;
26213   int lo_mask = mask & 0xFF;
26214
26215   gcc_assert (mask);
26216
26217   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26218     {
26219       /* Special case.  Do not generate a POP PC statement here, do it in
26220          thumb_exit() */
26221       thumb_exit (f, -1);
26222       return;
26223     }
26224
26225   fprintf (f, "\tpop\t{");
26226
26227   /* Look at the low registers first.  */
26228   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26229     {
26230       if (lo_mask & 1)
26231         {
26232           asm_fprintf (f, "%r", regno);
26233
26234           if ((lo_mask & ~1) != 0)
26235             fprintf (f, ", ");
26236         }
26237     }
26238
26239   if (mask & (1 << PC_REGNUM))
26240     {
26241       /* Catch popping the PC.  */
26242       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26243           || IS_CMSE_ENTRY (arm_current_func_type ()))
26244         {
26245           /* The PC is never poped directly, instead
26246              it is popped into r3 and then BX is used.  */
26247           fprintf (f, "}\n");
26248
26249           thumb_exit (f, -1);
26250
26251           return;
26252         }
26253       else
26254         {
26255           if (mask & 0xFF)
26256             fprintf (f, ", ");
26257
26258           asm_fprintf (f, "%r", PC_REGNUM);
26259         }
26260     }
26261
26262   fprintf (f, "}\n");
26263 }
26264
26265 /* Generate code to return from a thumb function.
26266    If 'reg_containing_return_addr' is -1, then the return address is
26267    actually on the stack, at the stack pointer.
26268
26269    Note: do not forget to update length attribute of corresponding insn pattern
26270    when changing assembly output (eg. length attribute of epilogue_insns when
26271    updating Armv8-M Baseline Security Extensions register clearing
26272    sequences).  */
26273 static void
26274 thumb_exit (FILE *f, int reg_containing_return_addr)
26275 {
26276   unsigned regs_available_for_popping;
26277   unsigned regs_to_pop;
26278   int pops_needed;
26279   unsigned available;
26280   unsigned required;
26281   machine_mode mode;
26282   int size;
26283   int restore_a4 = FALSE;
26284
26285   /* Compute the registers we need to pop.  */
26286   regs_to_pop = 0;
26287   pops_needed = 0;
26288
26289   if (reg_containing_return_addr == -1)
26290     {
26291       regs_to_pop |= 1 << LR_REGNUM;
26292       ++pops_needed;
26293     }
26294
26295   if (TARGET_BACKTRACE)
26296     {
26297       /* Restore the (ARM) frame pointer and stack pointer.  */
26298       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26299       pops_needed += 2;
26300     }
26301
26302   /* If there is nothing to pop then just emit the BX instruction and
26303      return.  */
26304   if (pops_needed == 0)
26305     {
26306       if (crtl->calls_eh_return)
26307         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26308
26309       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26310         {
26311           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26312              emitted by cmse_nonsecure_entry_clear_before_return ().  */
26313           if (!TARGET_HAVE_FPCXT_CMSE)
26314             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26315                          reg_containing_return_addr);
26316           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26317         }
26318       else
26319         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26320       return;
26321     }
26322   /* Otherwise if we are not supporting interworking and we have not created
26323      a backtrace structure and the function was not entered in ARM mode then
26324      just pop the return address straight into the PC.  */
26325   else if (!TARGET_INTERWORK
26326            && !TARGET_BACKTRACE
26327            && !is_called_in_ARM_mode (current_function_decl)
26328            && !crtl->calls_eh_return
26329            && !IS_CMSE_ENTRY (arm_current_func_type ()))
26330     {
26331       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26332       return;
26333     }
26334
26335   /* Find out how many of the (return) argument registers we can corrupt.  */
26336   regs_available_for_popping = 0;
26337
26338   /* If returning via __builtin_eh_return, the bottom three registers
26339      all contain information needed for the return.  */
26340   if (crtl->calls_eh_return)
26341     size = 12;
26342   else
26343     {
26344       /* If we can deduce the registers used from the function's
26345          return value.  This is more reliable that examining
26346          df_regs_ever_live_p () because that will be set if the register is
26347          ever used in the function, not just if the register is used
26348          to hold a return value.  */
26349
26350       if (crtl->return_rtx != 0)
26351         mode = GET_MODE (crtl->return_rtx);
26352       else
26353         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26354
26355       size = GET_MODE_SIZE (mode);
26356
26357       if (size == 0)
26358         {
26359           /* In a void function we can use any argument register.
26360              In a function that returns a structure on the stack
26361              we can use the second and third argument registers.  */
26362           if (mode == VOIDmode)
26363             regs_available_for_popping =
26364               (1 << ARG_REGISTER (1))
26365               | (1 << ARG_REGISTER (2))
26366               | (1 << ARG_REGISTER (3));
26367           else
26368             regs_available_for_popping =
26369               (1 << ARG_REGISTER (2))
26370               | (1 << ARG_REGISTER (3));
26371         }
26372       else if (size <= 4)
26373         regs_available_for_popping =
26374           (1 << ARG_REGISTER (2))
26375           | (1 << ARG_REGISTER (3));
26376       else if (size <= 8)
26377         regs_available_for_popping =
26378           (1 << ARG_REGISTER (3));
26379     }
26380
26381   /* Match registers to be popped with registers into which we pop them.  */
26382   for (available = regs_available_for_popping,
26383        required  = regs_to_pop;
26384        required != 0 && available != 0;
26385        available &= ~(available & - available),
26386        required  &= ~(required  & - required))
26387     -- pops_needed;
26388
26389   /* If we have any popping registers left over, remove them.  */
26390   if (available > 0)
26391     regs_available_for_popping &= ~available;
26392
26393   /* Otherwise if we need another popping register we can use
26394      the fourth argument register.  */
26395   else if (pops_needed)
26396     {
26397       /* If we have not found any free argument registers and
26398          reg a4 contains the return address, we must move it.  */
26399       if (regs_available_for_popping == 0
26400           && reg_containing_return_addr == LAST_ARG_REGNUM)
26401         {
26402           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26403           reg_containing_return_addr = LR_REGNUM;
26404         }
26405       else if (size > 12)
26406         {
26407           /* Register a4 is being used to hold part of the return value,
26408              but we have dire need of a free, low register.  */
26409           restore_a4 = TRUE;
26410
26411           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26412         }
26413
26414       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26415         {
26416           /* The fourth argument register is available.  */
26417           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26418
26419           --pops_needed;
26420         }
26421     }
26422
26423   /* Pop as many registers as we can.  */
26424   thumb_pop (f, regs_available_for_popping);
26425
26426   /* Process the registers we popped.  */
26427   if (reg_containing_return_addr == -1)
26428     {
26429       /* The return address was popped into the lowest numbered register.  */
26430       regs_to_pop &= ~(1 << LR_REGNUM);
26431
26432       reg_containing_return_addr =
26433         number_of_first_bit_set (regs_available_for_popping);
26434
26435       /* Remove this register for the mask of available registers, so that
26436          the return address will not be corrupted by further pops.  */
26437       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26438     }
26439
26440   /* If we popped other registers then handle them here.  */
26441   if (regs_available_for_popping)
26442     {
26443       int frame_pointer;
26444
26445       /* Work out which register currently contains the frame pointer.  */
26446       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26447
26448       /* Move it into the correct place.  */
26449       asm_fprintf (f, "\tmov\t%r, %r\n",
26450                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26451
26452       /* (Temporarily) remove it from the mask of popped registers.  */
26453       regs_available_for_popping &= ~(1 << frame_pointer);
26454       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26455
26456       if (regs_available_for_popping)
26457         {
26458           int stack_pointer;
26459
26460           /* We popped the stack pointer as well,
26461              find the register that contains it.  */
26462           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26463
26464           /* Move it into the stack register.  */
26465           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26466
26467           /* At this point we have popped all necessary registers, so
26468              do not worry about restoring regs_available_for_popping
26469              to its correct value:
26470
26471              assert (pops_needed == 0)
26472              assert (regs_available_for_popping == (1 << frame_pointer))
26473              assert (regs_to_pop == (1 << STACK_POINTER))  */
26474         }
26475       else
26476         {
26477           /* Since we have just move the popped value into the frame
26478              pointer, the popping register is available for reuse, and
26479              we know that we still have the stack pointer left to pop.  */
26480           regs_available_for_popping |= (1 << frame_pointer);
26481         }
26482     }
26483
26484   /* If we still have registers left on the stack, but we no longer have
26485      any registers into which we can pop them, then we must move the return
26486      address into the link register and make available the register that
26487      contained it.  */
26488   if (regs_available_for_popping == 0 && pops_needed > 0)
26489     {
26490       regs_available_for_popping |= 1 << reg_containing_return_addr;
26491
26492       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26493                    reg_containing_return_addr);
26494
26495       reg_containing_return_addr = LR_REGNUM;
26496     }
26497
26498   /* If we have registers left on the stack then pop some more.
26499      We know that at most we will want to pop FP and SP.  */
26500   if (pops_needed > 0)
26501     {
26502       int  popped_into;
26503       int  move_to;
26504
26505       thumb_pop (f, regs_available_for_popping);
26506
26507       /* We have popped either FP or SP.
26508          Move whichever one it is into the correct register.  */
26509       popped_into = number_of_first_bit_set (regs_available_for_popping);
26510       move_to     = number_of_first_bit_set (regs_to_pop);
26511
26512       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26513       --pops_needed;
26514     }
26515
26516   /* If we still have not popped everything then we must have only
26517      had one register available to us and we are now popping the SP.  */
26518   if (pops_needed > 0)
26519     {
26520       int  popped_into;
26521
26522       thumb_pop (f, regs_available_for_popping);
26523
26524       popped_into = number_of_first_bit_set (regs_available_for_popping);
26525
26526       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26527       /*
26528         assert (regs_to_pop == (1 << STACK_POINTER))
26529         assert (pops_needed == 1)
26530       */
26531     }
26532
26533   /* If necessary restore the a4 register.  */
26534   if (restore_a4)
26535     {
26536       if (reg_containing_return_addr != LR_REGNUM)
26537         {
26538           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26539           reg_containing_return_addr = LR_REGNUM;
26540         }
26541
26542       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26543     }
26544
26545   if (crtl->calls_eh_return)
26546     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26547
26548   /* Return to caller.  */
26549   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26550     {
26551       /* This is for the cases where LR is not being used to contain the return
26552          address.  It may therefore contain information that we might not want
26553          to leak, hence it must be cleared.  The value in R0 will never be a
26554          secret at this point, so it is safe to use it, see the clearing code
26555          in cmse_nonsecure_entry_clear_before_return ().  */
26556       if (reg_containing_return_addr != LR_REGNUM)
26557         asm_fprintf (f, "\tmov\tlr, r0\n");
26558
26559       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26560          by cmse_nonsecure_entry_clear_before_return ().  */
26561       if (!TARGET_HAVE_FPCXT_CMSE)
26562         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26563       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26564     }
26565   else
26566     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26567 }
26568 \f
26569 /* Scan INSN just before assembler is output for it.
26570    For Thumb-1, we track the status of the condition codes; this
26571    information is used in the cbranchsi4_insn pattern.  */
26572 void
26573 thumb1_final_prescan_insn (rtx_insn *insn)
26574 {
26575   if (flag_print_asm_name)
26576     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26577                  INSN_ADDRESSES (INSN_UID (insn)));
26578   /* Don't overwrite the previous setter when we get to a cbranch.  */
26579   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26580     {
26581       enum attr_conds conds;
26582
26583       if (cfun->machine->thumb1_cc_insn)
26584         {
26585           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26586               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26587             CC_STATUS_INIT;
26588         }
26589       conds = get_attr_conds (insn);
26590       if (conds == CONDS_SET)
26591         {
26592           rtx set = single_set (insn);
26593           cfun->machine->thumb1_cc_insn = insn;
26594           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26595           cfun->machine->thumb1_cc_op1 = const0_rtx;
26596           cfun->machine->thumb1_cc_mode = CC_NZmode;
26597           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26598             {
26599               rtx src1 = XEXP (SET_SRC (set), 1);
26600               if (src1 == const0_rtx)
26601                 cfun->machine->thumb1_cc_mode = CCmode;
26602             }
26603           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26604             {
26605               /* Record the src register operand instead of dest because
26606                  cprop_hardreg pass propagates src.  */
26607               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26608             }
26609         }
26610       else if (conds != CONDS_NOCOND)
26611         cfun->machine->thumb1_cc_insn = NULL_RTX;
26612     }
26613
26614     /* Check if unexpected far jump is used.  */
26615     if (cfun->machine->lr_save_eliminated
26616         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26617       internal_error("Unexpected thumb1 far jump");
26618 }
26619
26620 int
26621 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26622 {
26623   unsigned HOST_WIDE_INT mask = 0xff;
26624   int i;
26625
26626   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26627   if (val == 0) /* XXX */
26628     return 0;
26629
26630   for (i = 0; i < 25; i++)
26631     if ((val & (mask << i)) == val)
26632       return 1;
26633
26634   return 0;
26635 }
26636
26637 /* Returns nonzero if the current function contains,
26638    or might contain a far jump.  */
26639 static int
26640 thumb_far_jump_used_p (void)
26641 {
26642   rtx_insn *insn;
26643   bool far_jump = false;
26644   unsigned int func_size = 0;
26645
26646   /* If we have already decided that far jumps may be used,
26647      do not bother checking again, and always return true even if
26648      it turns out that they are not being used.  Once we have made
26649      the decision that far jumps are present (and that hence the link
26650      register will be pushed onto the stack) we cannot go back on it.  */
26651   if (cfun->machine->far_jump_used)
26652     return 1;
26653
26654   /* If this function is not being called from the prologue/epilogue
26655      generation code then it must be being called from the
26656      INITIAL_ELIMINATION_OFFSET macro.  */
26657   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26658     {
26659       /* In this case we know that we are being asked about the elimination
26660          of the arg pointer register.  If that register is not being used,
26661          then there are no arguments on the stack, and we do not have to
26662          worry that a far jump might force the prologue to push the link
26663          register, changing the stack offsets.  In this case we can just
26664          return false, since the presence of far jumps in the function will
26665          not affect stack offsets.
26666
26667          If the arg pointer is live (or if it was live, but has now been
26668          eliminated and so set to dead) then we do have to test to see if
26669          the function might contain a far jump.  This test can lead to some
26670          false negatives, since before reload is completed, then length of
26671          branch instructions is not known, so gcc defaults to returning their
26672          longest length, which in turn sets the far jump attribute to true.
26673
26674          A false negative will not result in bad code being generated, but it
26675          will result in a needless push and pop of the link register.  We
26676          hope that this does not occur too often.
26677
26678          If we need doubleword stack alignment this could affect the other
26679          elimination offsets so we can't risk getting it wrong.  */
26680       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26681         cfun->machine->arg_pointer_live = 1;
26682       else if (!cfun->machine->arg_pointer_live)
26683         return 0;
26684     }
26685
26686   /* We should not change far_jump_used during or after reload, as there is
26687      no chance to change stack frame layout.  */
26688   if (reload_in_progress || reload_completed)
26689     return 0;
26690
26691   /* Check to see if the function contains a branch
26692      insn with the far jump attribute set.  */
26693   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26694     {
26695       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26696         {
26697           far_jump = true;
26698         }
26699       func_size += get_attr_length (insn);
26700     }
26701
26702   /* Attribute far_jump will always be true for thumb1 before
26703      shorten_branch pass.  So checking far_jump attribute before
26704      shorten_branch isn't much useful.
26705
26706      Following heuristic tries to estimate more accurately if a far jump
26707      may finally be used.  The heuristic is very conservative as there is
26708      no chance to roll-back the decision of not to use far jump.
26709
26710      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26711      2-byte insn is associated with a 4 byte constant pool.  Using
26712      function size 2048/3 as the threshold is conservative enough.  */
26713   if (far_jump)
26714     {
26715       if ((func_size * 3) >= 2048)
26716         {
26717           /* Record the fact that we have decided that
26718              the function does use far jumps.  */
26719           cfun->machine->far_jump_used = 1;
26720           return 1;
26721         }
26722     }
26723
26724   return 0;
26725 }
26726
26727 /* Return nonzero if FUNC must be entered in ARM mode.  */
26728 static bool
26729 is_called_in_ARM_mode (tree func)
26730 {
26731   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26732
26733   /* Ignore the problem about functions whose address is taken.  */
26734   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26735     return true;
26736
26737 #ifdef ARM_PE
26738   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26739 #else
26740   return false;
26741 #endif
26742 }
26743
26744 /* Given the stack offsets and register mask in OFFSETS, decide how
26745    many additional registers to push instead of subtracting a constant
26746    from SP.  For epilogues the principle is the same except we use pop.
26747    FOR_PROLOGUE indicates which we're generating.  */
26748 static int
26749 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26750 {
26751   HOST_WIDE_INT amount;
26752   unsigned long live_regs_mask = offsets->saved_regs_mask;
26753   /* Extract a mask of the ones we can give to the Thumb's push/pop
26754      instruction.  */
26755   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26756   /* Then count how many other high registers will need to be pushed.  */
26757   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26758   int n_free, reg_base, size;
26759
26760   if (!for_prologue && frame_pointer_needed)
26761     amount = offsets->locals_base - offsets->saved_regs;
26762   else
26763     amount = offsets->outgoing_args - offsets->saved_regs;
26764
26765   /* If the stack frame size is 512 exactly, we can save one load
26766      instruction, which should make this a win even when optimizing
26767      for speed.  */
26768   if (!optimize_size && amount != 512)
26769     return 0;
26770
26771   /* Can't do this if there are high registers to push.  */
26772   if (high_regs_pushed != 0)
26773     return 0;
26774
26775   /* Shouldn't do it in the prologue if no registers would normally
26776      be pushed at all.  In the epilogue, also allow it if we'll have
26777      a pop insn for the PC.  */
26778   if  (l_mask == 0
26779        && (for_prologue
26780            || TARGET_BACKTRACE
26781            || (live_regs_mask & 1 << LR_REGNUM) == 0
26782            || TARGET_INTERWORK
26783            || crtl->args.pretend_args_size != 0))
26784     return 0;
26785
26786   /* Don't do this if thumb_expand_prologue wants to emit instructions
26787      between the push and the stack frame allocation.  */
26788   if (for_prologue
26789       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26790           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26791     return 0;
26792
26793   reg_base = 0;
26794   n_free = 0;
26795   if (!for_prologue)
26796     {
26797       size = arm_size_return_regs ();
26798       reg_base = ARM_NUM_INTS (size);
26799       live_regs_mask >>= reg_base;
26800     }
26801
26802   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26803          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26804     {
26805       live_regs_mask >>= 1;
26806       n_free++;
26807     }
26808
26809   if (n_free == 0)
26810     return 0;
26811   gcc_assert (amount / 4 * 4 == amount);
26812
26813   if (amount >= 512 && (amount - n_free * 4) < 512)
26814     return (amount - 508) / 4;
26815   if (amount <= n_free * 4)
26816     return amount / 4;
26817   return 0;
26818 }
26819
26820 /* The bits which aren't usefully expanded as rtl.  */
26821 const char *
26822 thumb1_unexpanded_epilogue (void)
26823 {
26824   arm_stack_offsets *offsets;
26825   int regno;
26826   unsigned long live_regs_mask = 0;
26827   int high_regs_pushed = 0;
26828   int extra_pop;
26829   int had_to_push_lr;
26830   int size;
26831
26832   if (cfun->machine->return_used_this_function != 0)
26833     return "";
26834
26835   if (IS_NAKED (arm_current_func_type ()))
26836     return "";
26837
26838   offsets = arm_get_frame_offsets ();
26839   live_regs_mask = offsets->saved_regs_mask;
26840   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26841
26842   /* If we can deduce the registers used from the function's return value.
26843      This is more reliable that examining df_regs_ever_live_p () because that
26844      will be set if the register is ever used in the function, not just if
26845      the register is used to hold a return value.  */
26846   size = arm_size_return_regs ();
26847
26848   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26849   if (extra_pop > 0)
26850     {
26851       unsigned long extra_mask = (1 << extra_pop) - 1;
26852       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26853     }
26854
26855   /* The prolog may have pushed some high registers to use as
26856      work registers.  e.g. the testsuite file:
26857      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26858      compiles to produce:
26859         push    {r4, r5, r6, r7, lr}
26860         mov     r7, r9
26861         mov     r6, r8
26862         push    {r6, r7}
26863      as part of the prolog.  We have to undo that pushing here.  */
26864
26865   if (high_regs_pushed)
26866     {
26867       unsigned long mask = live_regs_mask & 0xff;
26868       int next_hi_reg;
26869
26870       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26871
26872       if (mask == 0)
26873         /* Oh dear!  We have no low registers into which we can pop
26874            high registers!  */
26875         internal_error
26876           ("no low registers available for popping high registers");
26877
26878       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26879         if (live_regs_mask & (1 << next_hi_reg))
26880           break;
26881
26882       while (high_regs_pushed)
26883         {
26884           /* Find lo register(s) into which the high register(s) can
26885              be popped.  */
26886           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26887             {
26888               if (mask & (1 << regno))
26889                 high_regs_pushed--;
26890               if (high_regs_pushed == 0)
26891                 break;
26892             }
26893
26894           if (high_regs_pushed == 0 && regno >= 0)
26895             mask &= ~((1 << regno) - 1);
26896
26897           /* Pop the values into the low register(s).  */
26898           thumb_pop (asm_out_file, mask);
26899
26900           /* Move the value(s) into the high registers.  */
26901           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26902             {
26903               if (mask & (1 << regno))
26904                 {
26905                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26906                                regno);
26907
26908                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26909                        next_hi_reg--)
26910                     if (live_regs_mask & (1 << next_hi_reg))
26911                       break;
26912                 }
26913             }
26914         }
26915       live_regs_mask &= ~0x0f00;
26916     }
26917
26918   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26919   live_regs_mask &= 0xff;
26920
26921   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26922     {
26923       /* Pop the return address into the PC.  */
26924       if (had_to_push_lr)
26925         live_regs_mask |= 1 << PC_REGNUM;
26926
26927       /* Either no argument registers were pushed or a backtrace
26928          structure was created which includes an adjusted stack
26929          pointer, so just pop everything.  */
26930       if (live_regs_mask)
26931         thumb_pop (asm_out_file, live_regs_mask);
26932
26933       /* We have either just popped the return address into the
26934          PC or it is was kept in LR for the entire function.
26935          Note that thumb_pop has already called thumb_exit if the
26936          PC was in the list.  */
26937       if (!had_to_push_lr)
26938         thumb_exit (asm_out_file, LR_REGNUM);
26939     }
26940   else
26941     {
26942       /* Pop everything but the return address.  */
26943       if (live_regs_mask)
26944         thumb_pop (asm_out_file, live_regs_mask);
26945
26946       if (had_to_push_lr)
26947         {
26948           if (size > 12)
26949             {
26950               /* We have no free low regs, so save one.  */
26951               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26952                            LAST_ARG_REGNUM);
26953             }
26954
26955           /* Get the return address into a temporary register.  */
26956           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26957
26958           if (size > 12)
26959             {
26960               /* Move the return address to lr.  */
26961               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26962                            LAST_ARG_REGNUM);
26963               /* Restore the low register.  */
26964               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26965                            IP_REGNUM);
26966               regno = LR_REGNUM;
26967             }
26968           else
26969             regno = LAST_ARG_REGNUM;
26970         }
26971       else
26972         regno = LR_REGNUM;
26973
26974       /* Remove the argument registers that were pushed onto the stack.  */
26975       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26976                    SP_REGNUM, SP_REGNUM,
26977                    crtl->args.pretend_args_size);
26978
26979       thumb_exit (asm_out_file, regno);
26980     }
26981
26982   return "";
26983 }
26984
26985 /* Functions to save and restore machine-specific function data.  */
26986 static struct machine_function *
26987 arm_init_machine_status (void)
26988 {
26989   struct machine_function *machine;
26990   machine = ggc_cleared_alloc<machine_function> ();
26991
26992 #if ARM_FT_UNKNOWN != 0
26993   machine->func_type = ARM_FT_UNKNOWN;
26994 #endif
26995   machine->static_chain_stack_bytes = -1;
26996   machine->pacspval_needed = 0;
26997   return machine;
26998 }
26999
27000 /* Return an RTX indicating where the return address to the
27001    calling function can be found.  */
27002 rtx
27003 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27004 {
27005   if (count != 0)
27006     return NULL_RTX;
27007
27008   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27009 }
27010
27011 /* Do anything needed before RTL is emitted for each function.  */
27012 void
27013 arm_init_expanders (void)
27014 {
27015   /* Arrange to initialize and mark the machine per-function status.  */
27016   init_machine_status = arm_init_machine_status;
27017
27018   /* This is to stop the combine pass optimizing away the alignment
27019      adjustment of va_arg.  */
27020   /* ??? It is claimed that this should not be necessary.  */
27021   if (cfun)
27022     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27023 }
27024
27025 /* Check that FUNC is called with a different mode.  */
27026
27027 bool
27028 arm_change_mode_p (tree func)
27029 {
27030   if (TREE_CODE (func) != FUNCTION_DECL)
27031     return false;
27032
27033   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
27034
27035   if (!callee_tree)
27036     callee_tree = target_option_default_node;
27037
27038   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
27039   int flags = callee_opts->x_target_flags;
27040
27041   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
27042 }
27043
27044 /* Like arm_compute_initial_elimination offset.  Simpler because there
27045    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
27046    to point at the base of the local variables after static stack
27047    space for a function has been allocated.  */
27048
27049 HOST_WIDE_INT
27050 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27051 {
27052   arm_stack_offsets *offsets;
27053
27054   offsets = arm_get_frame_offsets ();
27055
27056   switch (from)
27057     {
27058     case ARG_POINTER_REGNUM:
27059       switch (to)
27060         {
27061         case STACK_POINTER_REGNUM:
27062           return offsets->outgoing_args - offsets->saved_args;
27063
27064         case FRAME_POINTER_REGNUM:
27065           return offsets->soft_frame - offsets->saved_args;
27066
27067         case ARM_HARD_FRAME_POINTER_REGNUM:
27068           return offsets->saved_regs - offsets->saved_args;
27069
27070         case THUMB_HARD_FRAME_POINTER_REGNUM:
27071           return offsets->locals_base - offsets->saved_args;
27072
27073         default:
27074           gcc_unreachable ();
27075         }
27076       break;
27077
27078     case FRAME_POINTER_REGNUM:
27079       switch (to)
27080         {
27081         case STACK_POINTER_REGNUM:
27082           return offsets->outgoing_args - offsets->soft_frame;
27083
27084         case ARM_HARD_FRAME_POINTER_REGNUM:
27085           return offsets->saved_regs - offsets->soft_frame;
27086
27087         case THUMB_HARD_FRAME_POINTER_REGNUM:
27088           return offsets->locals_base - offsets->soft_frame;
27089
27090         default:
27091           gcc_unreachable ();
27092         }
27093       break;
27094
27095     default:
27096       gcc_unreachable ();
27097     }
27098 }
27099
27100 /* Generate the function's prologue.  */
27101
27102 void
27103 thumb1_expand_prologue (void)
27104 {
27105   rtx_insn *insn;
27106
27107   HOST_WIDE_INT amount;
27108   HOST_WIDE_INT size;
27109   arm_stack_offsets *offsets;
27110   unsigned long func_type;
27111   int regno;
27112   unsigned long live_regs_mask;
27113   unsigned long l_mask;
27114   unsigned high_regs_pushed = 0;
27115   bool lr_needs_saving;
27116
27117   func_type = arm_current_func_type ();
27118
27119   /* Naked functions don't have prologues.  */
27120   if (IS_NAKED (func_type))
27121     {
27122       if (flag_stack_usage_info)
27123         current_function_static_stack_size = 0;
27124       return;
27125     }
27126
27127   if (IS_INTERRUPT (func_type))
27128     {
27129       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27130       return;
27131     }
27132
27133   if (is_called_in_ARM_mode (current_function_decl))
27134     emit_insn (gen_prologue_thumb1_interwork ());
27135
27136   offsets = arm_get_frame_offsets ();
27137   live_regs_mask = offsets->saved_regs_mask;
27138   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27139
27140   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27141   l_mask = live_regs_mask & 0x40ff;
27142   /* Then count how many other high registers will need to be pushed.  */
27143   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27144
27145   if (crtl->args.pretend_args_size)
27146     {
27147       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27148
27149       if (cfun->machine->uses_anonymous_args)
27150         {
27151           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27152           unsigned long mask;
27153
27154           mask = 1ul << (LAST_ARG_REGNUM + 1);
27155           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27156
27157           insn = thumb1_emit_multi_reg_push (mask, 0);
27158         }
27159       else
27160         {
27161           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27162                                         stack_pointer_rtx, x));
27163         }
27164       RTX_FRAME_RELATED_P (insn) = 1;
27165     }
27166
27167   if (TARGET_BACKTRACE)
27168     {
27169       HOST_WIDE_INT offset = 0;
27170       unsigned work_register;
27171       rtx work_reg, x, arm_hfp_rtx;
27172
27173       /* We have been asked to create a stack backtrace structure.
27174          The code looks like this:
27175
27176          0   .align 2
27177          0   func:
27178          0     sub   SP, #16         Reserve space for 4 registers.
27179          2     push  {R7}            Push low registers.
27180          4     add   R7, SP, #20     Get the stack pointer before the push.
27181          6     str   R7, [SP, #8]    Store the stack pointer
27182                                         (before reserving the space).
27183          8     mov   R7, PC          Get hold of the start of this code + 12.
27184         10     str   R7, [SP, #16]   Store it.
27185         12     mov   R7, FP          Get hold of the current frame pointer.
27186         14     str   R7, [SP, #4]    Store it.
27187         16     mov   R7, LR          Get hold of the current return address.
27188         18     str   R7, [SP, #12]   Store it.
27189         20     add   R7, SP, #16     Point at the start of the
27190                                         backtrace structure.
27191         22     mov   FP, R7          Put this value into the frame pointer.  */
27192
27193       work_register = thumb_find_work_register (live_regs_mask);
27194       work_reg = gen_rtx_REG (SImode, work_register);
27195       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27196
27197       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27198                                     stack_pointer_rtx, GEN_INT (-16)));
27199       RTX_FRAME_RELATED_P (insn) = 1;
27200
27201       if (l_mask)
27202         {
27203           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27204           RTX_FRAME_RELATED_P (insn) = 1;
27205           lr_needs_saving = false;
27206
27207           offset = bit_count (l_mask) * UNITS_PER_WORD;
27208         }
27209
27210       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27211       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27212
27213       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27214       x = gen_frame_mem (SImode, x);
27215       emit_move_insn (x, work_reg);
27216
27217       /* Make sure that the instruction fetching the PC is in the right place
27218          to calculate "start of backtrace creation code + 12".  */
27219       /* ??? The stores using the common WORK_REG ought to be enough to
27220          prevent the scheduler from doing anything weird.  Failing that
27221          we could always move all of the following into an UNSPEC_VOLATILE.  */
27222       if (l_mask)
27223         {
27224           x = gen_rtx_REG (SImode, PC_REGNUM);
27225           emit_move_insn (work_reg, x);
27226
27227           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27228           x = gen_frame_mem (SImode, x);
27229           emit_move_insn (x, work_reg);
27230
27231           emit_move_insn (work_reg, arm_hfp_rtx);
27232
27233           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27234           x = gen_frame_mem (SImode, x);
27235           emit_move_insn (x, work_reg);
27236         }
27237       else
27238         {
27239           emit_move_insn (work_reg, arm_hfp_rtx);
27240
27241           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27242           x = gen_frame_mem (SImode, x);
27243           emit_move_insn (x, work_reg);
27244
27245           x = gen_rtx_REG (SImode, PC_REGNUM);
27246           emit_move_insn (work_reg, x);
27247
27248           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27249           x = gen_frame_mem (SImode, x);
27250           emit_move_insn (x, work_reg);
27251         }
27252
27253       x = gen_rtx_REG (SImode, LR_REGNUM);
27254       emit_move_insn (work_reg, x);
27255
27256       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27257       x = gen_frame_mem (SImode, x);
27258       emit_move_insn (x, work_reg);
27259
27260       x = GEN_INT (offset + 12);
27261       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27262
27263       emit_move_insn (arm_hfp_rtx, work_reg);
27264     }
27265   /* Optimization:  If we are not pushing any low registers but we are going
27266      to push some high registers then delay our first push.  This will just
27267      be a push of LR and we can combine it with the push of the first high
27268      register.  */
27269   else if ((l_mask & 0xff) != 0
27270            || (high_regs_pushed == 0 && lr_needs_saving))
27271     {
27272       unsigned long mask = l_mask;
27273       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27274       insn = thumb1_emit_multi_reg_push (mask, mask);
27275       RTX_FRAME_RELATED_P (insn) = 1;
27276       lr_needs_saving = false;
27277     }
27278
27279   if (high_regs_pushed)
27280     {
27281       unsigned pushable_regs;
27282       unsigned next_hi_reg;
27283       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27284                                                  : crtl->args.info.nregs;
27285       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27286
27287       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27288         if (live_regs_mask & (1 << next_hi_reg))
27289           break;
27290
27291       /* Here we need to mask out registers used for passing arguments
27292          even if they can be pushed.  This is to avoid using them to
27293          stash the high registers.  Such kind of stash may clobber the
27294          use of arguments.  */
27295       pushable_regs = l_mask & (~arg_regs_mask);
27296       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27297
27298       /* Normally, LR can be used as a scratch register once it has been
27299          saved; but if the function examines its own return address then
27300          the value is still live and we need to avoid using it.  */
27301       bool return_addr_live
27302         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27303                            LR_REGNUM);
27304
27305       if (lr_needs_saving || return_addr_live)
27306         pushable_regs &= ~(1 << LR_REGNUM);
27307
27308       if (pushable_regs == 0)
27309         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27310
27311       while (high_regs_pushed > 0)
27312         {
27313           unsigned long real_regs_mask = 0;
27314           unsigned long push_mask = 0;
27315
27316           for (regno = LR_REGNUM; regno >= 0; regno --)
27317             {
27318               if (pushable_regs & (1 << regno))
27319                 {
27320                   emit_move_insn (gen_rtx_REG (SImode, regno),
27321                                   gen_rtx_REG (SImode, next_hi_reg));
27322
27323                   high_regs_pushed --;
27324                   real_regs_mask |= (1 << next_hi_reg);
27325                   push_mask |= (1 << regno);
27326
27327                   if (high_regs_pushed)
27328                     {
27329                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27330                            next_hi_reg --)
27331                         if (live_regs_mask & (1 << next_hi_reg))
27332                           break;
27333                     }
27334                   else
27335                     break;
27336                 }
27337             }
27338
27339           /* If we had to find a work register and we have not yet
27340              saved the LR then add it to the list of regs to push.  */
27341           if (lr_needs_saving)
27342             {
27343               push_mask |= 1 << LR_REGNUM;
27344               real_regs_mask |= 1 << LR_REGNUM;
27345               lr_needs_saving = false;
27346               /* If the return address is not live at this point, we
27347                  can add LR to the list of registers that we can use
27348                  for pushes.  */
27349               if (!return_addr_live)
27350                 pushable_regs |= 1 << LR_REGNUM;
27351             }
27352
27353           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27354           RTX_FRAME_RELATED_P (insn) = 1;
27355         }
27356     }
27357
27358   /* Load the pic register before setting the frame pointer,
27359      so we can use r7 as a temporary work register.  */
27360   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27361     arm_load_pic_register (live_regs_mask, NULL_RTX);
27362
27363   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27364     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27365                     stack_pointer_rtx);
27366
27367   size = offsets->outgoing_args - offsets->saved_args;
27368   if (flag_stack_usage_info)
27369     current_function_static_stack_size = size;
27370
27371   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
27372   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27373        || flag_stack_clash_protection)
27374       && size)
27375     sorry ("%<-fstack-check=specific%> for Thumb-1");
27376
27377   amount = offsets->outgoing_args - offsets->saved_regs;
27378   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27379   if (amount)
27380     {
27381       if (amount < 512)
27382         {
27383           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27384                                         GEN_INT (- amount)));
27385           RTX_FRAME_RELATED_P (insn) = 1;
27386         }
27387       else
27388         {
27389           rtx reg, dwarf;
27390
27391           /* The stack decrement is too big for an immediate value in a single
27392              insn.  In theory we could issue multiple subtracts, but after
27393              three of them it becomes more space efficient to place the full
27394              value in the constant pool and load into a register.  (Also the
27395              ARM debugger really likes to see only one stack decrement per
27396              function).  So instead we look for a scratch register into which
27397              we can load the decrement, and then we subtract this from the
27398              stack pointer.  Unfortunately on the thumb the only available
27399              scratch registers are the argument registers, and we cannot use
27400              these as they may hold arguments to the function.  Instead we
27401              attempt to locate a call preserved register which is used by this
27402              function.  If we can find one, then we know that it will have
27403              been pushed at the start of the prologue and so we can corrupt
27404              it now.  */
27405           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27406             if (live_regs_mask & (1 << regno))
27407               break;
27408
27409           gcc_assert(regno <= LAST_LO_REGNUM);
27410
27411           reg = gen_rtx_REG (SImode, regno);
27412
27413           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27414
27415           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27416                                         stack_pointer_rtx, reg));
27417
27418           dwarf = gen_rtx_SET (stack_pointer_rtx,
27419                                plus_constant (Pmode, stack_pointer_rtx,
27420                                               -amount));
27421           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27422           RTX_FRAME_RELATED_P (insn) = 1;
27423         }
27424     }
27425
27426   if (frame_pointer_needed)
27427     thumb_set_frame_pointer (offsets);
27428
27429   /* If we are profiling, make sure no instructions are scheduled before
27430      the call to mcount.  Similarly if the user has requested no
27431      scheduling in the prolog.  Similarly if we want non-call exceptions
27432      using the EABI unwinder, to prevent faulting instructions from being
27433      swapped with a stack adjustment.  */
27434   if (crtl->profile || !TARGET_SCHED_PROLOG
27435       || (arm_except_unwind_info (&global_options) == UI_TARGET
27436           && cfun->can_throw_non_call_exceptions))
27437     emit_insn (gen_blockage ());
27438
27439   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27440   if (live_regs_mask & 0xff)
27441     cfun->machine->lr_save_eliminated = 0;
27442 }
27443
27444 /* Clear caller saved registers not used to pass return values and leaked
27445    condition flags before exiting a cmse_nonsecure_entry function.  */
27446
27447 void
27448 cmse_nonsecure_entry_clear_before_return (void)
27449 {
27450   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27451   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27452   uint32_t padding_bits_to_clear = 0;
27453   auto_sbitmap to_clear_bitmap (maxregno + 1);
27454   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27455   tree result_type;
27456
27457   bitmap_clear (to_clear_bitmap);
27458   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27459   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27460
27461   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27462      registers.  */
27463   if (clear_vfpregs)
27464     {
27465       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27466
27467       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27468
27469       if (!TARGET_HAVE_FPCXT_CMSE)
27470         {
27471           /* Make sure we don't clear the two scratch registers used to clear
27472              the relevant FPSCR bits in output_return_instruction.  */
27473           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27474           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27475           emit_use (gen_rtx_REG (SImode, 4));
27476           bitmap_clear_bit (to_clear_bitmap, 4);
27477         }
27478     }
27479
27480   /* If the user has defined registers to be caller saved, these are no longer
27481      restored by the function before returning and must thus be cleared for
27482      security purposes.  */
27483   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27484     {
27485       /* We do not touch registers that can be used to pass arguments as per
27486          the AAPCS, since these should never be made callee-saved by user
27487          options.  */
27488       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27489         continue;
27490       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27491         continue;
27492       if (!callee_saved_reg_p (regno)
27493           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27494               || TARGET_HARD_FLOAT))
27495         bitmap_set_bit (to_clear_bitmap, regno);
27496     }
27497
27498   /* Make sure we do not clear the registers used to return the result in.  */
27499   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27500   if (!VOID_TYPE_P (result_type))
27501     {
27502       uint64_t to_clear_return_mask;
27503       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27504
27505       /* No need to check that we return in registers, because we don't
27506          support returning on stack yet.  */
27507       gcc_assert (REG_P (result_rtl));
27508       to_clear_return_mask
27509         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27510                                      &padding_bits_to_clear);
27511       if (to_clear_return_mask)
27512         {
27513           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27514           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27515             {
27516               if (to_clear_return_mask & (1ULL << regno))
27517                 bitmap_clear_bit (to_clear_bitmap, regno);
27518             }
27519         }
27520     }
27521
27522   if (padding_bits_to_clear != 0)
27523     {
27524       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27525       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27526
27527       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27528          returning a composite type, which only uses r0.  Let's make sure that
27529          r1-r3 is cleared too.  */
27530       bitmap_clear (to_clear_arg_regs_bitmap);
27531       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27532       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27533     }
27534
27535   /* Clear full registers that leak before returning.  */
27536   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27537   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27538   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27539                         clearing_reg);
27540 }
27541
27542 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27543    POP instruction can be generated.  LR should be replaced by PC.  All
27544    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27545    all we really need to check here is if single register is to be
27546    returned, or multiple register return.  */
27547 void
27548 thumb2_expand_return (bool simple_return)
27549 {
27550   int i, num_regs;
27551   unsigned long saved_regs_mask;
27552   arm_stack_offsets *offsets;
27553
27554   offsets = arm_get_frame_offsets ();
27555   saved_regs_mask = offsets->saved_regs_mask;
27556
27557   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27558     if (saved_regs_mask & (1 << i))
27559       num_regs++;
27560
27561   if (!simple_return && saved_regs_mask)
27562     {
27563       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27564          functions or adapt code to handle according to ACLE.  This path should
27565          not be reachable for cmse_nonsecure_entry functions though we prefer
27566          to assert it for now to ensure that future code changes do not silently
27567          change this behavior.  */
27568       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27569       if (arm_current_function_pac_enabled_p ())
27570         {
27571           gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27572           arm_emit_multi_reg_pop (saved_regs_mask);
27573           emit_insn (gen_aut_nop ());
27574           emit_jump_insn (simple_return_rtx);
27575         }
27576       else if (num_regs == 1)
27577         {
27578           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27579           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27580           rtx addr = gen_rtx_MEM (SImode,
27581                                   gen_rtx_POST_INC (SImode,
27582                                                     stack_pointer_rtx));
27583           set_mem_alias_set (addr, get_frame_alias_set ());
27584           XVECEXP (par, 0, 0) = ret_rtx;
27585           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27586           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27587           emit_jump_insn (par);
27588         }
27589       else
27590         {
27591           saved_regs_mask &= ~ (1 << LR_REGNUM);
27592           saved_regs_mask |=   (1 << PC_REGNUM);
27593           arm_emit_multi_reg_pop (saved_regs_mask);
27594         }
27595     }
27596   else
27597     {
27598       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27599         cmse_nonsecure_entry_clear_before_return ();
27600       emit_jump_insn (simple_return_rtx);
27601     }
27602 }
27603
27604 void
27605 thumb1_expand_epilogue (void)
27606 {
27607   HOST_WIDE_INT amount;
27608   arm_stack_offsets *offsets;
27609   int regno;
27610
27611   /* Naked functions don't have prologues.  */
27612   if (IS_NAKED (arm_current_func_type ()))
27613     return;
27614
27615   offsets = arm_get_frame_offsets ();
27616   amount = offsets->outgoing_args - offsets->saved_regs;
27617
27618   if (frame_pointer_needed)
27619     {
27620       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27621       amount = offsets->locals_base - offsets->saved_regs;
27622     }
27623   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27624
27625   gcc_assert (amount >= 0);
27626   if (amount)
27627     {
27628       emit_insn (gen_blockage ());
27629
27630       if (amount < 512)
27631         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27632                                GEN_INT (amount)));
27633       else
27634         {
27635           /* r3 is always free in the epilogue.  */
27636           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27637
27638           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27639           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27640         }
27641     }
27642
27643   /* Emit a USE (stack_pointer_rtx), so that
27644      the stack adjustment will not be deleted.  */
27645   emit_insn (gen_force_register_use (stack_pointer_rtx));
27646
27647   if (crtl->profile || !TARGET_SCHED_PROLOG)
27648     emit_insn (gen_blockage ());
27649
27650   /* Emit a clobber for each insn that will be restored in the epilogue,
27651      so that flow2 will get register lifetimes correct.  */
27652   for (regno = 0; regno < 13; regno++)
27653     if (reg_needs_saving_p (regno))
27654       emit_clobber (gen_rtx_REG (SImode, regno));
27655
27656   if (! df_regs_ever_live_p (LR_REGNUM))
27657     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27658
27659   /* Clear all caller-saved regs that are not used to return.  */
27660   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27661     cmse_nonsecure_entry_clear_before_return ();
27662 }
27663
27664 /* Epilogue code for APCS frame.  */
27665 static void
27666 arm_expand_epilogue_apcs_frame (bool really_return)
27667 {
27668   unsigned long func_type;
27669   unsigned long saved_regs_mask;
27670   int num_regs = 0;
27671   int i;
27672   int floats_from_frame = 0;
27673   arm_stack_offsets *offsets;
27674
27675   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27676   func_type = arm_current_func_type ();
27677
27678   /* Get frame offsets for ARM.  */
27679   offsets = arm_get_frame_offsets ();
27680   saved_regs_mask = offsets->saved_regs_mask;
27681
27682   /* Find the offset of the floating-point save area in the frame.  */
27683   floats_from_frame
27684     = (offsets->saved_args
27685        + arm_compute_static_chain_stack_bytes ()
27686        - offsets->frame);
27687
27688   /* Compute how many core registers saved and how far away the floats are.  */
27689   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27690     if (saved_regs_mask & (1 << i))
27691       {
27692         num_regs++;
27693         floats_from_frame += 4;
27694       }
27695
27696   if (TARGET_VFP_BASE)
27697     {
27698       int start_reg;
27699       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27700
27701       /* The offset is from IP_REGNUM.  */
27702       int saved_size = arm_get_vfp_saved_size ();
27703       if (saved_size > 0)
27704         {
27705           rtx_insn *insn;
27706           floats_from_frame += saved_size;
27707           insn = emit_insn (gen_addsi3 (ip_rtx,
27708                                         hard_frame_pointer_rtx,
27709                                         GEN_INT (-floats_from_frame)));
27710           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27711                                        ip_rtx, hard_frame_pointer_rtx);
27712         }
27713
27714       /* Generate VFP register multi-pop.  */
27715       start_reg = FIRST_VFP_REGNUM;
27716
27717       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27718         /* Look for a case where a reg does not need restoring.  */
27719         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27720           {
27721             if (start_reg != i)
27722               arm_emit_vfp_multi_reg_pop (start_reg,
27723                                           (i - start_reg) / 2,
27724                                           gen_rtx_REG (SImode,
27725                                                        IP_REGNUM));
27726             start_reg = i + 2;
27727           }
27728
27729       /* Restore the remaining regs that we have discovered (or possibly
27730          even all of them, if the conditional in the for loop never
27731          fired).  */
27732       if (start_reg != i)
27733         arm_emit_vfp_multi_reg_pop (start_reg,
27734                                     (i - start_reg) / 2,
27735                                     gen_rtx_REG (SImode, IP_REGNUM));
27736     }
27737
27738   if (TARGET_IWMMXT)
27739     {
27740       /* The frame pointer is guaranteed to be non-double-word aligned, as
27741          it is set to double-word-aligned old_stack_pointer - 4.  */
27742       rtx_insn *insn;
27743       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27744
27745       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27746         if (reg_needs_saving_p (i))
27747           {
27748             rtx addr = gen_frame_mem (V2SImode,
27749                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27750                                                 - lrm_count * 4));
27751             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27752             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27753                                                gen_rtx_REG (V2SImode, i),
27754                                                NULL_RTX);
27755             lrm_count += 2;
27756           }
27757     }
27758
27759   /* saved_regs_mask should contain IP which contains old stack pointer
27760      at the time of activation creation.  Since SP and IP are adjacent registers,
27761      we can restore the value directly into SP.  */
27762   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27763   saved_regs_mask &= ~(1 << IP_REGNUM);
27764   saved_regs_mask |= (1 << SP_REGNUM);
27765
27766   /* There are two registers left in saved_regs_mask - LR and PC.  We
27767      only need to restore LR (the return address), but to
27768      save time we can load it directly into PC, unless we need a
27769      special function exit sequence, or we are not really returning.  */
27770   if (really_return
27771       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27772       && !crtl->calls_eh_return)
27773     /* Delete LR from the register mask, so that LR on
27774        the stack is loaded into the PC in the register mask.  */
27775     saved_regs_mask &= ~(1 << LR_REGNUM);
27776   else
27777     saved_regs_mask &= ~(1 << PC_REGNUM);
27778
27779   num_regs = bit_count (saved_regs_mask);
27780   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27781     {
27782       rtx_insn *insn;
27783       emit_insn (gen_blockage ());
27784       /* Unwind the stack to just below the saved registers.  */
27785       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27786                                     hard_frame_pointer_rtx,
27787                                     GEN_INT (- 4 * num_regs)));
27788
27789       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27790                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27791     }
27792
27793   arm_emit_multi_reg_pop (saved_regs_mask);
27794
27795   if (IS_INTERRUPT (func_type))
27796     {
27797       /* Interrupt handlers will have pushed the
27798          IP onto the stack, so restore it now.  */
27799       rtx_insn *insn;
27800       rtx addr = gen_rtx_MEM (SImode,
27801                               gen_rtx_POST_INC (SImode,
27802                               stack_pointer_rtx));
27803       set_mem_alias_set (addr, get_frame_alias_set ());
27804       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27805       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27806                                          gen_rtx_REG (SImode, IP_REGNUM),
27807                                          NULL_RTX);
27808     }
27809
27810   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27811     return;
27812
27813   if (crtl->calls_eh_return)
27814     emit_insn (gen_addsi3 (stack_pointer_rtx,
27815                            stack_pointer_rtx,
27816                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27817
27818   if (IS_STACKALIGN (func_type))
27819     /* Restore the original stack pointer.  Before prologue, the stack was
27820        realigned and the original stack pointer saved in r0.  For details,
27821        see comment in arm_expand_prologue.  */
27822     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27823
27824   emit_jump_insn (simple_return_rtx);
27825 }
27826
27827 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27828    function is not a sibcall.  */
27829 void
27830 arm_expand_epilogue (bool really_return)
27831 {
27832   unsigned long func_type;
27833   unsigned long saved_regs_mask;
27834   int num_regs = 0;
27835   int i;
27836   int amount;
27837   arm_stack_offsets *offsets;
27838
27839   func_type = arm_current_func_type ();
27840
27841   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27842      let output_return_instruction take care of instruction emission if any.  */
27843   if (IS_NAKED (func_type)
27844       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27845     {
27846       if (really_return)
27847         emit_jump_insn (simple_return_rtx);
27848       return;
27849     }
27850
27851   /* If we are throwing an exception, then we really must be doing a
27852      return, so we can't tail-call.  */
27853   gcc_assert (!crtl->calls_eh_return || really_return);
27854
27855   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27856     {
27857       arm_expand_epilogue_apcs_frame (really_return);
27858       return;
27859     }
27860
27861   /* Get frame offsets for ARM.  */
27862   offsets = arm_get_frame_offsets ();
27863   saved_regs_mask = offsets->saved_regs_mask;
27864   num_regs = bit_count (saved_regs_mask);
27865
27866   if (frame_pointer_needed)
27867     {
27868       rtx_insn *insn;
27869       /* Restore stack pointer if necessary.  */
27870       if (TARGET_ARM)
27871         {
27872           /* In ARM mode, frame pointer points to first saved register.
27873              Restore stack pointer to last saved register.  */
27874           amount = offsets->frame - offsets->saved_regs;
27875
27876           /* Force out any pending memory operations that reference stacked data
27877              before stack de-allocation occurs.  */
27878           emit_insn (gen_blockage ());
27879           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27880                             hard_frame_pointer_rtx,
27881                             GEN_INT (amount)));
27882           arm_add_cfa_adjust_cfa_note (insn, amount,
27883                                        stack_pointer_rtx,
27884                                        hard_frame_pointer_rtx);
27885
27886           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27887              deleted.  */
27888           emit_insn (gen_force_register_use (stack_pointer_rtx));
27889         }
27890       else
27891         {
27892           /* In Thumb-2 mode, the frame pointer points to the last saved
27893              register.  */
27894           amount = offsets->locals_base - offsets->saved_regs;
27895           if (amount)
27896             {
27897               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27898                                 hard_frame_pointer_rtx,
27899                                 GEN_INT (amount)));
27900               arm_add_cfa_adjust_cfa_note (insn, amount,
27901                                            hard_frame_pointer_rtx,
27902                                            hard_frame_pointer_rtx);
27903             }
27904
27905           /* Force out any pending memory operations that reference stacked data
27906              before stack de-allocation occurs.  */
27907           emit_insn (gen_blockage ());
27908           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27909                                        hard_frame_pointer_rtx));
27910           arm_add_cfa_adjust_cfa_note (insn, 0,
27911                                        stack_pointer_rtx,
27912                                        hard_frame_pointer_rtx);
27913           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27914              deleted.  */
27915           emit_insn (gen_force_register_use (stack_pointer_rtx));
27916         }
27917     }
27918   else
27919     {
27920       /* Pop off outgoing args and local frame to adjust stack pointer to
27921          last saved register.  */
27922       amount = offsets->outgoing_args - offsets->saved_regs;
27923       if (amount)
27924         {
27925           rtx_insn *tmp;
27926           /* Force out any pending memory operations that reference stacked data
27927              before stack de-allocation occurs.  */
27928           emit_insn (gen_blockage ());
27929           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27930                                        stack_pointer_rtx,
27931                                        GEN_INT (amount)));
27932           arm_add_cfa_adjust_cfa_note (tmp, amount,
27933                                        stack_pointer_rtx, stack_pointer_rtx);
27934           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27935              not deleted.  */
27936           emit_insn (gen_force_register_use (stack_pointer_rtx));
27937         }
27938     }
27939
27940   if (TARGET_VFP_BASE)
27941     {
27942       /* Generate VFP register multi-pop.  */
27943       int end_reg = LAST_VFP_REGNUM + 1;
27944
27945       /* Scan the registers in reverse order.  We need to match
27946          any groupings made in the prologue and generate matching
27947          vldm operations.  The need to match groups is because,
27948          unlike pop, vldm can only do consecutive regs.  */
27949       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27950         /* Look for a case where a reg does not need restoring.  */
27951         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27952           {
27953             /* Restore the regs discovered so far (from reg+2 to
27954                end_reg).  */
27955             if (end_reg > i + 2)
27956               arm_emit_vfp_multi_reg_pop (i + 2,
27957                                           (end_reg - (i + 2)) / 2,
27958                                           stack_pointer_rtx);
27959             end_reg = i;
27960           }
27961
27962       /* Restore the remaining regs that we have discovered (or possibly
27963          even all of them, if the conditional in the for loop never
27964          fired).  */
27965       if (end_reg > i + 2)
27966         arm_emit_vfp_multi_reg_pop (i + 2,
27967                                     (end_reg - (i + 2)) / 2,
27968                                     stack_pointer_rtx);
27969     }
27970
27971   if (TARGET_IWMMXT)
27972     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27973       if (reg_needs_saving_p (i))
27974         {
27975           rtx_insn *insn;
27976           rtx addr = gen_rtx_MEM (V2SImode,
27977                                   gen_rtx_POST_INC (SImode,
27978                                                     stack_pointer_rtx));
27979           set_mem_alias_set (addr, get_frame_alias_set ());
27980           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27981           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27982                                              gen_rtx_REG (V2SImode, i),
27983                                              NULL_RTX);
27984           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27985                                        stack_pointer_rtx, stack_pointer_rtx);
27986         }
27987
27988   if (saved_regs_mask)
27989     {
27990       rtx insn;
27991       bool return_in_pc = false;
27992
27993       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27994           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27995           && !IS_CMSE_ENTRY (func_type)
27996           && !IS_STACKALIGN (func_type)
27997           && really_return
27998           && crtl->args.pretend_args_size == 0
27999           && saved_regs_mask & (1 << LR_REGNUM)
28000           && !crtl->calls_eh_return
28001           && !arm_current_function_pac_enabled_p ())
28002         {
28003           saved_regs_mask &= ~(1 << LR_REGNUM);
28004           saved_regs_mask |= (1 << PC_REGNUM);
28005           return_in_pc = true;
28006         }
28007
28008       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
28009         {
28010           for (i = 0; i <= LAST_ARM_REGNUM; i++)
28011             if (saved_regs_mask & (1 << i))
28012               {
28013                 rtx addr = gen_rtx_MEM (SImode,
28014                                         gen_rtx_POST_INC (SImode,
28015                                                           stack_pointer_rtx));
28016                 set_mem_alias_set (addr, get_frame_alias_set ());
28017
28018                 if (i == PC_REGNUM)
28019                   {
28020                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
28021                     XVECEXP (insn, 0, 0) = ret_rtx;
28022                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
28023                                                         addr);
28024                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
28025                     insn = emit_jump_insn (insn);
28026                   }
28027                 else
28028                   {
28029                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
28030                                                  addr));
28031                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28032                                                        gen_rtx_REG (SImode, i),
28033                                                        NULL_RTX);
28034                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28035                                                  stack_pointer_rtx,
28036                                                  stack_pointer_rtx);
28037                   }
28038               }
28039         }
28040       else
28041         {
28042           if (TARGET_LDRD
28043               && current_tune->prefer_ldrd_strd
28044               && !optimize_function_for_size_p (cfun))
28045             {
28046               if (TARGET_THUMB2)
28047                 thumb2_emit_ldrd_pop (saved_regs_mask);
28048               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28049                 arm_emit_ldrd_pop (saved_regs_mask);
28050               else
28051                 arm_emit_multi_reg_pop (saved_regs_mask);
28052             }
28053           else
28054             arm_emit_multi_reg_pop (saved_regs_mask);
28055         }
28056
28057       if (return_in_pc)
28058         return;
28059     }
28060
28061   amount
28062     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28063   if (amount)
28064     {
28065       int i, j;
28066       rtx dwarf = NULL_RTX;
28067       rtx_insn *tmp =
28068         emit_insn (gen_addsi3 (stack_pointer_rtx,
28069                                stack_pointer_rtx,
28070                                GEN_INT (amount)));
28071
28072       RTX_FRAME_RELATED_P (tmp) = 1;
28073
28074       if (cfun->machine->uses_anonymous_args)
28075         {
28076           /* Restore pretend args.  Refer arm_expand_prologue on how to save
28077              pretend_args in stack.  */
28078           int num_regs = crtl->args.pretend_args_size / 4;
28079           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28080           for (j = 0, i = 0; j < num_regs; i++)
28081             if (saved_regs_mask & (1 << i))
28082               {
28083                 rtx reg = gen_rtx_REG (SImode, i);
28084                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28085                 j++;
28086               }
28087           REG_NOTES (tmp) = dwarf;
28088         }
28089       arm_add_cfa_adjust_cfa_note (tmp, amount,
28090                                    stack_pointer_rtx, stack_pointer_rtx);
28091     }
28092
28093   if (IS_CMSE_ENTRY (func_type))
28094     {
28095       /* CMSE_ENTRY always returns.  */
28096       gcc_assert (really_return);
28097       /* Clear all caller-saved regs that are not used to return.  */
28098       cmse_nonsecure_entry_clear_before_return ();
28099
28100       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28101          VLDR.  */
28102       if (TARGET_HAVE_FPCXT_CMSE)
28103         {
28104           rtx_insn *insn;
28105
28106           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28107                                                    GEN_INT (FPCXTNS_ENUM)));
28108           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28109                                   plus_constant (Pmode, stack_pointer_rtx, 4));
28110           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28111           RTX_FRAME_RELATED_P (insn) = 1;
28112         }
28113     }
28114
28115   if (arm_current_function_pac_enabled_p ())
28116     emit_insn (gen_aut_nop ());
28117
28118   if (!really_return)
28119     return;
28120
28121   if (crtl->calls_eh_return)
28122     emit_insn (gen_addsi3 (stack_pointer_rtx,
28123                            stack_pointer_rtx,
28124                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28125
28126   if (IS_STACKALIGN (func_type))
28127     /* Restore the original stack pointer.  Before prologue, the stack was
28128        realigned and the original stack pointer saved in r0.  For details,
28129        see comment in arm_expand_prologue.  */
28130     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28131
28132   emit_jump_insn (simple_return_rtx);
28133 }
28134
28135 /* Implementation of insn prologue_thumb1_interwork.  This is the first
28136    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
28137
28138 const char *
28139 thumb1_output_interwork (void)
28140 {
28141   const char * name;
28142   FILE *f = asm_out_file;
28143
28144   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28145   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28146               == SYMBOL_REF);
28147   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28148
28149   /* Generate code sequence to switch us into Thumb mode.  */
28150   /* The .code 32 directive has already been emitted by
28151      ASM_DECLARE_FUNCTION_NAME.  */
28152   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28153   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28154
28155   /* Generate a label, so that the debugger will notice the
28156      change in instruction sets.  This label is also used by
28157      the assembler to bypass the ARM code when this function
28158      is called from a Thumb encoded function elsewhere in the
28159      same file.  Hence the definition of STUB_NAME here must
28160      agree with the definition in gas/config/tc-arm.c.  */
28161
28162 #define STUB_NAME ".real_start_of"
28163
28164   fprintf (f, "\t.code\t16\n");
28165 #ifdef ARM_PE
28166   if (arm_dllexport_name_p (name))
28167     name = arm_strip_name_encoding (name);
28168 #endif
28169   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28170   fprintf (f, "\t.thumb_func\n");
28171   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28172
28173   return "";
28174 }
28175
28176 /* Handle the case of a double word load into a low register from
28177    a computed memory address.  The computed address may involve a
28178    register which is overwritten by the load.  */
28179 const char *
28180 thumb_load_double_from_address (rtx *operands)
28181 {
28182   rtx addr;
28183   rtx base;
28184   rtx offset;
28185   rtx arg1;
28186   rtx arg2;
28187
28188   gcc_assert (REG_P (operands[0]));
28189   gcc_assert (MEM_P (operands[1]));
28190
28191   /* Get the memory address.  */
28192   addr = XEXP (operands[1], 0);
28193
28194   /* Work out how the memory address is computed.  */
28195   switch (GET_CODE (addr))
28196     {
28197     case REG:
28198       operands[2] = adjust_address (operands[1], SImode, 4);
28199
28200       if (REGNO (operands[0]) == REGNO (addr))
28201         {
28202           output_asm_insn ("ldr\t%H0, %2", operands);
28203           output_asm_insn ("ldr\t%0, %1", operands);
28204         }
28205       else
28206         {
28207           output_asm_insn ("ldr\t%0, %1", operands);
28208           output_asm_insn ("ldr\t%H0, %2", operands);
28209         }
28210       break;
28211
28212     case CONST:
28213       /* Compute <address> + 4 for the high order load.  */
28214       operands[2] = adjust_address (operands[1], SImode, 4);
28215
28216       output_asm_insn ("ldr\t%0, %1", operands);
28217       output_asm_insn ("ldr\t%H0, %2", operands);
28218       break;
28219
28220     case PLUS:
28221       arg1   = XEXP (addr, 0);
28222       arg2   = XEXP (addr, 1);
28223
28224       if (CONSTANT_P (arg1))
28225         base = arg2, offset = arg1;
28226       else
28227         base = arg1, offset = arg2;
28228
28229       gcc_assert (REG_P (base));
28230
28231       /* Catch the case of <address> = <reg> + <reg> */
28232       if (REG_P (offset))
28233         {
28234           int reg_offset = REGNO (offset);
28235           int reg_base   = REGNO (base);
28236           int reg_dest   = REGNO (operands[0]);
28237
28238           /* Add the base and offset registers together into the
28239              higher destination register.  */
28240           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28241                        reg_dest + 1, reg_base, reg_offset);
28242
28243           /* Load the lower destination register from the address in
28244              the higher destination register.  */
28245           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28246                        reg_dest, reg_dest + 1);
28247
28248           /* Load the higher destination register from its own address
28249              plus 4.  */
28250           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28251                        reg_dest + 1, reg_dest + 1);
28252         }
28253       else
28254         {
28255           /* Compute <address> + 4 for the high order load.  */
28256           operands[2] = adjust_address (operands[1], SImode, 4);
28257
28258           /* If the computed address is held in the low order register
28259              then load the high order register first, otherwise always
28260              load the low order register first.  */
28261           if (REGNO (operands[0]) == REGNO (base))
28262             {
28263               output_asm_insn ("ldr\t%H0, %2", operands);
28264               output_asm_insn ("ldr\t%0, %1", operands);
28265             }
28266           else
28267             {
28268               output_asm_insn ("ldr\t%0, %1", operands);
28269               output_asm_insn ("ldr\t%H0, %2", operands);
28270             }
28271         }
28272       break;
28273
28274     case LABEL_REF:
28275       /* With no registers to worry about we can just load the value
28276          directly.  */
28277       operands[2] = adjust_address (operands[1], SImode, 4);
28278
28279       output_asm_insn ("ldr\t%H0, %2", operands);
28280       output_asm_insn ("ldr\t%0, %1", operands);
28281       break;
28282
28283     default:
28284       gcc_unreachable ();
28285     }
28286
28287   return "";
28288 }
28289
28290 const char *
28291 thumb_output_move_mem_multiple (int n, rtx *operands)
28292 {
28293   switch (n)
28294     {
28295     case 2:
28296       if (REGNO (operands[4]) > REGNO (operands[5]))
28297         std::swap (operands[4], operands[5]);
28298
28299       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28300       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28301       break;
28302
28303     case 3:
28304       if (REGNO (operands[4]) > REGNO (operands[5]))
28305         std::swap (operands[4], operands[5]);
28306       if (REGNO (operands[5]) > REGNO (operands[6]))
28307         std::swap (operands[5], operands[6]);
28308       if (REGNO (operands[4]) > REGNO (operands[5]))
28309         std::swap (operands[4], operands[5]);
28310
28311       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28312       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28313       break;
28314
28315     default:
28316       gcc_unreachable ();
28317     }
28318
28319   return "";
28320 }
28321
28322 /* Output a call-via instruction for thumb state.  */
28323 const char *
28324 thumb_call_via_reg (rtx reg)
28325 {
28326   int regno = REGNO (reg);
28327   rtx *labelp;
28328
28329   gcc_assert (regno < LR_REGNUM);
28330
28331   /* If we are in the normal text section we can use a single instance
28332      per compilation unit.  If we are doing function sections, then we need
28333      an entry per section, since we can't rely on reachability.  */
28334   if (in_section == text_section)
28335     {
28336       thumb_call_reg_needed = 1;
28337
28338       if (thumb_call_via_label[regno] == NULL)
28339         thumb_call_via_label[regno] = gen_label_rtx ();
28340       labelp = thumb_call_via_label + regno;
28341     }
28342   else
28343     {
28344       if (cfun->machine->call_via[regno] == NULL)
28345         cfun->machine->call_via[regno] = gen_label_rtx ();
28346       labelp = cfun->machine->call_via + regno;
28347     }
28348
28349   output_asm_insn ("bl\t%a0", labelp);
28350   return "";
28351 }
28352
28353 /* Routines for generating rtl.  */
28354 void
28355 thumb_expand_cpymemqi (rtx *operands)
28356 {
28357   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28358   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28359   HOST_WIDE_INT len = INTVAL (operands[2]);
28360   HOST_WIDE_INT offset = 0;
28361
28362   while (len >= 12)
28363     {
28364       emit_insn (gen_cpymem12b (out, in, out, in));
28365       len -= 12;
28366     }
28367
28368   if (len >= 8)
28369     {
28370       emit_insn (gen_cpymem8b (out, in, out, in));
28371       len -= 8;
28372     }
28373
28374   if (len >= 4)
28375     {
28376       rtx reg = gen_reg_rtx (SImode);
28377       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28378       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28379       len -= 4;
28380       offset += 4;
28381     }
28382
28383   if (len >= 2)
28384     {
28385       rtx reg = gen_reg_rtx (HImode);
28386       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28387                                               plus_constant (Pmode, in,
28388                                                              offset))));
28389       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28390                                                                 offset)),
28391                             reg));
28392       len -= 2;
28393       offset += 2;
28394     }
28395
28396   if (len)
28397     {
28398       rtx reg = gen_reg_rtx (QImode);
28399       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28400                                               plus_constant (Pmode, in,
28401                                                              offset))));
28402       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28403                                                                 offset)),
28404                             reg));
28405     }
28406 }
28407
28408 void
28409 thumb_reload_out_hi (rtx *operands)
28410 {
28411   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28412 }
28413
28414 /* Return the length of a function name prefix
28415     that starts with the character 'c'.  */
28416 static int
28417 arm_get_strip_length (int c)
28418 {
28419   switch (c)
28420     {
28421     ARM_NAME_ENCODING_LENGTHS
28422       default: return 0;
28423     }
28424 }
28425
28426 /* Return a pointer to a function's name with any
28427    and all prefix encodings stripped from it.  */
28428 const char *
28429 arm_strip_name_encoding (const char *name)
28430 {
28431   int skip;
28432
28433   while ((skip = arm_get_strip_length (* name)))
28434     name += skip;
28435
28436   return name;
28437 }
28438
28439 /* If there is a '*' anywhere in the name's prefix, then
28440    emit the stripped name verbatim, otherwise prepend an
28441    underscore if leading underscores are being used.  */
28442 void
28443 arm_asm_output_labelref (FILE *stream, const char *name)
28444 {
28445   int skip;
28446   int verbatim = 0;
28447
28448   while ((skip = arm_get_strip_length (* name)))
28449     {
28450       verbatim |= (*name == '*');
28451       name += skip;
28452     }
28453
28454   if (verbatim)
28455     fputs (name, stream);
28456   else
28457     asm_fprintf (stream, "%U%s", name);
28458 }
28459
28460 /* This function is used to emit an EABI tag and its associated value.
28461    We emit the numerical value of the tag in case the assembler does not
28462    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28463    the tag name in a comment so that anyone reading the assembler output
28464    will know which tag is being set.
28465
28466    This function is not static because arm-c.cc needs it too.  */
28467
28468 void
28469 arm_emit_eabi_attribute (const char *name, int num, int val)
28470 {
28471   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28472   if (flag_verbose_asm || flag_debug_asm)
28473     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28474   asm_fprintf (asm_out_file, "\n");
28475 }
28476
28477 /* This function is used to print CPU tuning information as comment
28478    in assembler file.  Pointers are not printed for now.  */
28479
28480 void
28481 arm_print_tune_info (void)
28482 {
28483   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28484   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28485                current_tune->constant_limit);
28486   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28487                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28488   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28489                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28490   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28491                "prefetch.l1_cache_size:\t%d\n",
28492                current_tune->prefetch.l1_cache_size);
28493   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28494                "prefetch.l1_cache_line_size:\t%d\n",
28495                current_tune->prefetch.l1_cache_line_size);
28496   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28497                "prefer_constant_pool:\t%d\n",
28498                (int) current_tune->prefer_constant_pool);
28499   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28500                "branch_cost:\t(s:speed, p:predictable)\n");
28501   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28502   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28503                current_tune->branch_cost (false, false));
28504   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28505                current_tune->branch_cost (false, true));
28506   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28507                current_tune->branch_cost (true, false));
28508   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28509                current_tune->branch_cost (true, true));
28510   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28511                "prefer_ldrd_strd:\t%d\n",
28512                (int) current_tune->prefer_ldrd_strd);
28513   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28514                "logical_op_non_short_circuit:\t[%d,%d]\n",
28515                (int) current_tune->logical_op_non_short_circuit_thumb,
28516                (int) current_tune->logical_op_non_short_circuit_arm);
28517   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28518                "disparage_flag_setting_t16_encodings:\t%d\n",
28519                (int) current_tune->disparage_flag_setting_t16_encodings);
28520   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28521                "string_ops_prefer_neon:\t%d\n",
28522                (int) current_tune->string_ops_prefer_neon);
28523   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28524                "max_insns_inline_memset:\t%d\n",
28525                current_tune->max_insns_inline_memset);
28526   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28527                current_tune->fusible_ops);
28528   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28529                (int) current_tune->sched_autopref);
28530 }
28531
28532 /* The last set of target options used to emit .arch directives, etc.  This
28533    could be a function-local static if it were not required to expose it as a
28534    root to the garbage collector.  */
28535 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28536
28537 /* Print .arch and .arch_extension directives corresponding to the
28538    current architecture configuration.  */
28539 static void
28540 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28541 {
28542   arm_build_target build_target;
28543   /* If the target options haven't changed since the last time we were called
28544      there is nothing to do.  This should be sufficient to suppress the
28545      majority of redundant work.  */
28546   if (last_asm_targ_options == targ_options)
28547     return;
28548
28549   last_asm_targ_options = targ_options;
28550
28551   build_target.isa = sbitmap_alloc (isa_num_bits);
28552   arm_configure_build_target (&build_target, targ_options, false);
28553
28554   if (build_target.core_name
28555       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28556     {
28557       const char* truncated_name
28558         = arm_rewrite_selected_cpu (build_target.core_name);
28559       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28560     }
28561
28562   const arch_option *arch
28563     = arm_parse_arch_option_name (all_architectures, "-march",
28564                                   build_target.arch_name);
28565   auto_sbitmap opt_bits (isa_num_bits);
28566
28567   gcc_assert (arch);
28568
28569   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28570     {
28571       /* Keep backward compatability for assemblers which don't support
28572          armv7ve.  Fortunately, none of the following extensions are reset
28573          by a .fpu directive.  */
28574       asm_fprintf (stream, "\t.arch armv7-a\n");
28575       asm_fprintf (stream, "\t.arch_extension virt\n");
28576       asm_fprintf (stream, "\t.arch_extension idiv\n");
28577       asm_fprintf (stream, "\t.arch_extension sec\n");
28578       asm_fprintf (stream, "\t.arch_extension mp\n");
28579     }
28580   else
28581     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28582
28583   /* The .fpu directive will reset any architecture extensions from the
28584      assembler that relate to the fp/vector extensions.  So put this out before
28585      any .arch_extension directives.  */
28586   const char *fpu_name = (TARGET_SOFT_FLOAT
28587                           ? "softvfp"
28588                           : arm_identify_fpu_from_isa (build_target.isa));
28589   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28590
28591   if (!arch->common.extensions)
28592     return;
28593
28594   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28595        opt->name != NULL;
28596        opt++)
28597     {
28598       if (!opt->remove)
28599         {
28600           arm_initialize_isa (opt_bits, opt->isa_bits);
28601
28602           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28603              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28604              floating point instructions is disabled.  So the following check
28605              restricts the printing of ".arch_extension mve" and
28606              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28607              this special behaviour because the feature bit "mve" and
28608              "mve_float" are not part of "fpu bits", so they are not cleared
28609              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28610              TARGET_HAVE_MVE_FLOAT are disabled.  */
28611           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28612               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28613                   && !TARGET_HAVE_MVE_FLOAT))
28614             continue;
28615
28616           /* If every feature bit of this option is set in the target ISA
28617              specification, print out the option name.  However, don't print
28618              anything if all the bits are part of the FPU specification.  */
28619           if (bitmap_subset_p (opt_bits, build_target.isa)
28620               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28621             asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28622         }
28623     }
28624 }
28625
28626 static void
28627 arm_file_start (void)
28628 {
28629   int val;
28630   bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28631   bool bti = (aarch_enable_bti == 1);
28632
28633   arm_print_asm_arch_directives
28634     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28635
28636   if (TARGET_BPABI)
28637     {
28638       /* If we have a named cpu, but we the assembler does not support that
28639          name via .cpu, put out a cpu name attribute; but don't do this if the
28640          name starts with the fictitious prefix, 'generic'.  */
28641       if (arm_active_target.core_name
28642           && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28643           && !startswith (arm_active_target.core_name, "generic"))
28644         {
28645           const char* truncated_name
28646             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28647           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28648             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28649                          truncated_name);
28650         }
28651
28652       if (print_tune_info)
28653         arm_print_tune_info ();
28654
28655       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28656         arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28657
28658       if (TARGET_HARD_FLOAT_ABI)
28659         arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28660
28661       /* Some of these attributes only apply when the corresponding features
28662          are used.  However we don't have any easy way of figuring this out.
28663          Conservatively record the setting that would have been used.  */
28664
28665       if (flag_rounding_math)
28666         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28667
28668       if (!flag_unsafe_math_optimizations)
28669         {
28670           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28671           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28672         }
28673       if (flag_signaling_nans)
28674         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28675
28676       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28677                            flag_finite_math_only ? 1 : 3);
28678
28679       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28680       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28681       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28682                                flag_short_enums ? 1 : 2);
28683
28684       /* Tag_ABI_optimization_goals.  */
28685       if (optimize_size)
28686         val = 4;
28687       else if (optimize >= 2)
28688         val = 2;
28689       else if (optimize)
28690         val = 1;
28691       else
28692         val = 6;
28693       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28694
28695       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28696                                unaligned_access);
28697
28698       if (arm_fp16_format)
28699         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28700                              (int) arm_fp16_format);
28701
28702       if (TARGET_HAVE_PACBTI)
28703         {
28704           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28705           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28706         }
28707       else if (pac || bti)
28708         {
28709           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28710           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28711         }
28712
28713       if (bti)
28714         arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28715       if (pac)
28716         arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28717
28718       if (arm_lang_output_object_attributes_hook)
28719         arm_lang_output_object_attributes_hook();
28720     }
28721
28722   default_file_start ();
28723 }
28724
28725 static void
28726 arm_file_end (void)
28727 {
28728   int regno;
28729
28730   /* Just in case the last function output in the assembler had non-default
28731      architecture directives, we force the assembler state back to the default
28732      set, so that any 'calculated' build attributes are based on the default
28733      options rather than the special options for that function.  */
28734   arm_print_asm_arch_directives
28735     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28736
28737   if (NEED_INDICATE_EXEC_STACK)
28738     /* Add .note.GNU-stack.  */
28739     file_end_indicate_exec_stack ();
28740
28741   if (! thumb_call_reg_needed)
28742     return;
28743
28744   switch_to_section (text_section);
28745   asm_fprintf (asm_out_file, "\t.code 16\n");
28746   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28747
28748   for (regno = 0; regno < LR_REGNUM; regno++)
28749     {
28750       rtx label = thumb_call_via_label[regno];
28751
28752       if (label != 0)
28753         {
28754           targetm.asm_out.internal_label (asm_out_file, "L",
28755                                           CODE_LABEL_NUMBER (label));
28756           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28757         }
28758     }
28759 }
28760
28761 #ifndef ARM_PE
28762 /* Symbols in the text segment can be accessed without indirecting via the
28763    constant pool; it may take an extra binary operation, but this is still
28764    faster than indirecting via memory.  Don't do this when not optimizing,
28765    since we won't be calculating al of the offsets necessary to do this
28766    simplification.  */
28767
28768 static void
28769 arm_encode_section_info (tree decl, rtx rtl, int first)
28770 {
28771   if (optimize > 0 && TREE_CONSTANT (decl))
28772     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28773
28774   default_encode_section_info (decl, rtl, first);
28775 }
28776 #endif /* !ARM_PE */
28777
28778 static void
28779 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28780 {
28781   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28782       && !strcmp (prefix, "L"))
28783     {
28784       arm_ccfsm_state = 0;
28785       arm_target_insn = NULL;
28786     }
28787   default_internal_label (stream, prefix, labelno);
28788 }
28789
28790 /* Define classes to generate code as RTL or output asm to a file.
28791    Using templates then allows to use the same code to output code
28792    sequences in the two formats.  */
28793 class thumb1_const_rtl
28794 {
28795  public:
28796   thumb1_const_rtl (rtx dst) : dst (dst) {}
28797
28798   void mov (HOST_WIDE_INT val)
28799   {
28800     emit_set_insn (dst, GEN_INT (val));
28801   }
28802
28803   void add (HOST_WIDE_INT val)
28804   {
28805     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28806   }
28807
28808   void ashift (HOST_WIDE_INT shift)
28809   {
28810     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28811   }
28812
28813   void neg ()
28814   {
28815     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28816   }
28817
28818  private:
28819   rtx dst;
28820 };
28821
28822 class thumb1_const_print
28823 {
28824  public:
28825   thumb1_const_print (FILE *f, int regno)
28826   {
28827     t_file = f;
28828     dst_regname = reg_names[regno];
28829   }
28830
28831   void mov (HOST_WIDE_INT val)
28832   {
28833     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28834                  dst_regname, val);
28835   }
28836
28837   void add (HOST_WIDE_INT val)
28838   {
28839     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28840                  dst_regname, val);
28841   }
28842
28843   void ashift (HOST_WIDE_INT shift)
28844   {
28845     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28846                  dst_regname, shift);
28847   }
28848
28849   void neg ()
28850   {
28851     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28852   }
28853
28854  private:
28855   FILE *t_file;
28856   const char *dst_regname;
28857 };
28858
28859 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28860    Avoid generating useless code when one of the bytes is zero.  */
28861 template <class T>
28862 void
28863 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28864 {
28865   bool mov_done_p = false;
28866   unsigned HOST_WIDE_INT val = op1;
28867   int shift = 0;
28868   int i;
28869
28870   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28871
28872   if (val <= 255)
28873     {
28874       dst.mov (val);
28875       return;
28876     }
28877
28878   /* For negative numbers with the first nine bits set, build the
28879      opposite of OP1, then negate it, it's generally shorter and not
28880      longer.  */
28881   if ((val & 0xFF800000) == 0xFF800000)
28882     {
28883       thumb1_gen_const_int_1 (dst, -op1);
28884       dst.neg ();
28885       return;
28886     }
28887
28888   /* In the general case, we need 7 instructions to build
28889      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28890      do better if VAL is small enough, or
28891      right-shiftable by a suitable amount.  If the
28892      right-shift enables to encode at least one less byte,
28893      it's worth it: we save a adds and a lsls at the
28894      expense of a final lsls.  */
28895   int final_shift = number_of_first_bit_set (val);
28896
28897   int leading_zeroes = clz_hwi (val);
28898   int number_of_bytes_needed
28899     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28900        / BITS_PER_UNIT) + 1;
28901   int number_of_bytes_needed2
28902     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28903        / BITS_PER_UNIT) + 1;
28904
28905   if (number_of_bytes_needed2 < number_of_bytes_needed)
28906     val >>= final_shift;
28907   else
28908     final_shift = 0;
28909
28910   /* If we are in a very small range, we can use either a single movs
28911      or movs+adds.  */
28912   if (val <= 510)
28913     {
28914       if (val > 255)
28915         {
28916           unsigned HOST_WIDE_INT high = val - 255;
28917
28918           dst.mov (high);
28919           dst.add (255);
28920         }
28921       else
28922         dst.mov (val);
28923
28924       if (final_shift > 0)
28925         dst.ashift (final_shift);
28926     }
28927   else
28928     {
28929       /* General case, emit upper 3 bytes as needed.  */
28930       for (i = 0; i < 3; i++)
28931         {
28932           unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28933
28934           if (byte)
28935             {
28936               /* We are about to emit new bits, stop accumulating a
28937                  shift amount, and left-shift only if we have already
28938                  emitted some upper bits.  */
28939               if (mov_done_p)
28940                 {
28941                   dst.ashift (shift);
28942                   dst.add (byte);
28943                 }
28944               else
28945                 dst.mov (byte);
28946
28947               /* Stop accumulating shift amount since we've just
28948                  emitted some bits.  */
28949               shift = 0;
28950
28951               mov_done_p = true;
28952             }
28953
28954           if (mov_done_p)
28955             shift += 8;
28956         }
28957
28958       /* Emit lower byte.  */
28959       if (!mov_done_p)
28960         dst.mov (val & 0xff);
28961       else
28962         {
28963           dst.ashift (shift);
28964           if (val & 0xff)
28965             dst.add (val & 0xff);
28966         }
28967
28968       if (final_shift > 0)
28969         dst.ashift (final_shift);
28970     }
28971 }
28972
28973 /* Proxies for thumb1.md, since the thumb1_const_print and
28974    thumb1_const_rtl classes are not exported.  */
28975 void
28976 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28977 {
28978   thumb1_const_rtl t (dst);
28979   thumb1_gen_const_int_1 (t, op1);
28980 }
28981
28982 void
28983 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28984 {
28985   thumb1_const_print t (asm_out_file, REGNO (dst));
28986   thumb1_gen_const_int_1 (t, op1);
28987 }
28988
28989 /* Output code to add DELTA to the first argument, and then jump
28990    to FUNCTION.  Used for C++ multiple inheritance.  */
28991
28992 static void
28993 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28994                      HOST_WIDE_INT, tree function)
28995 {
28996   static int thunk_label = 0;
28997   char label[256];
28998   char labelpc[256];
28999   int mi_delta = delta;
29000   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
29001   int shift = 0;
29002   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
29003                     ? 1 : 0);
29004   if (mi_delta < 0)
29005     mi_delta = - mi_delta;
29006
29007   final_start_function (emit_barrier (), file, 1);
29008
29009   if (TARGET_THUMB1)
29010     {
29011       int labelno = thunk_label++;
29012       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
29013       /* Thunks are entered in arm mode when available.  */
29014       if (TARGET_THUMB1_ONLY)
29015         {
29016           /* push r3 so we can use it as a temporary.  */
29017           /* TODO: Omit this save if r3 is not used.  */
29018           fputs ("\tpush {r3}\n", file);
29019
29020           /* With -mpure-code, we cannot load the address from the
29021              constant pool: we build it explicitly.  */
29022           if (target_pure_code)
29023             {
29024               fputs ("\tmovs\tr3, #:upper8_15:#", file);
29025               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29026               fputc ('\n', file);
29027               fputs ("\tlsls r3, #8\n", file);
29028               fputs ("\tadds\tr3, #:upper0_7:#", file);
29029               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29030               fputc ('\n', file);
29031               fputs ("\tlsls r3, #8\n", file);
29032               fputs ("\tadds\tr3, #:lower8_15:#", file);
29033               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29034               fputc ('\n', file);
29035               fputs ("\tlsls r3, #8\n", file);
29036               fputs ("\tadds\tr3, #:lower0_7:#", file);
29037               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29038               fputc ('\n', file);
29039             }
29040           else
29041             fputs ("\tldr\tr3, ", file);
29042         }
29043       else
29044         {
29045           fputs ("\tldr\tr12, ", file);
29046         }
29047
29048       if (!target_pure_code)
29049         {
29050           assemble_name (file, label);
29051           fputc ('\n', file);
29052         }
29053
29054       if (flag_pic)
29055         {
29056           /* If we are generating PIC, the ldr instruction below loads
29057              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
29058              the address of the add + 8, so we have:
29059
29060              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29061                  = target + 1.
29062
29063              Note that we have "+ 1" because some versions of GNU ld
29064              don't set the low bit of the result for R_ARM_REL32
29065              relocations against thumb function symbols.
29066              On ARMv6M this is +4, not +8.  */
29067           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29068           assemble_name (file, labelpc);
29069           fputs (":\n", file);
29070           if (TARGET_THUMB1_ONLY)
29071             {
29072               /* This is 2 insns after the start of the thunk, so we know it
29073                  is 4-byte aligned.  */
29074               fputs ("\tadd\tr3, pc, r3\n", file);
29075               fputs ("\tmov r12, r3\n", file);
29076             }
29077           else
29078             fputs ("\tadd\tr12, pc, r12\n", file);
29079         }
29080       else if (TARGET_THUMB1_ONLY)
29081         fputs ("\tmov r12, r3\n", file);
29082     }
29083   if (TARGET_THUMB1_ONLY)
29084     {
29085       if (mi_delta > 255)
29086         {
29087           /* With -mpure-code, we cannot load MI_DELTA from the
29088              constant pool: we build it explicitly.  */
29089           if (target_pure_code)
29090             {
29091               thumb1_const_print r3 (file, 3);
29092               thumb1_gen_const_int_1 (r3, mi_delta);
29093             }
29094           else
29095             {
29096               fputs ("\tldr\tr3, ", file);
29097               assemble_name (file, label);
29098               fputs ("+4\n", file);
29099             }
29100           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29101                        mi_op, this_regno, this_regno);
29102         }
29103       else if (mi_delta != 0)
29104         {
29105           /* Thumb1 unified syntax requires s suffix in instruction name when
29106              one of the operands is immediate.  */
29107           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29108                        mi_op, this_regno, this_regno,
29109                        mi_delta);
29110         }
29111     }
29112   else
29113     {
29114       /* TODO: Use movw/movt for large constants when available.  */
29115       while (mi_delta != 0)
29116         {
29117           if ((mi_delta & (3 << shift)) == 0)
29118             shift += 2;
29119           else
29120             {
29121               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29122                            mi_op, this_regno, this_regno,
29123                            mi_delta & (0xff << shift));
29124               mi_delta &= ~(0xff << shift);
29125               shift += 8;
29126             }
29127         }
29128     }
29129   if (TARGET_THUMB1)
29130     {
29131       if (TARGET_THUMB1_ONLY)
29132         fputs ("\tpop\t{r3}\n", file);
29133
29134       fprintf (file, "\tbx\tr12\n");
29135
29136       /* With -mpure-code, we don't need to emit literals for the
29137          function address and delta since we emitted code to build
29138          them.  */
29139       if (!target_pure_code)
29140         {
29141           ASM_OUTPUT_ALIGN (file, 2);
29142           assemble_name (file, label);
29143           fputs (":\n", file);
29144           if (flag_pic)
29145             {
29146               /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
29147               rtx tem = XEXP (DECL_RTL (function), 0);
29148               /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29149                  pipeline offset is four rather than eight.  Adjust the offset
29150                  accordingly.  */
29151               tem = plus_constant (GET_MODE (tem), tem,
29152                                    TARGET_THUMB1_ONLY ? -3 : -7);
29153               tem = gen_rtx_MINUS (GET_MODE (tem),
29154                                    tem,
29155                                    gen_rtx_SYMBOL_REF (Pmode,
29156                                                        ggc_strdup (labelpc)));
29157               assemble_integer (tem, 4, BITS_PER_WORD, 1);
29158             }
29159           else
29160             /* Output ".word .LTHUNKn".  */
29161             assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29162
29163           if (TARGET_THUMB1_ONLY && mi_delta > 255)
29164             assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29165         }
29166     }
29167   else
29168     {
29169       fputs ("\tb\t", file);
29170       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29171       if (NEED_PLT_RELOC)
29172         fputs ("(PLT)", file);
29173       fputc ('\n', file);
29174     }
29175
29176   final_end_function ();
29177 }
29178
29179 /* MI thunk handling for TARGET_32BIT.  */
29180
29181 static void
29182 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29183                        HOST_WIDE_INT vcall_offset, tree function)
29184 {
29185   const bool long_call_p = arm_is_long_call_p (function);
29186
29187   /* On ARM, this_regno is R0 or R1 depending on
29188      whether the function returns an aggregate or not.
29189   */
29190   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29191                                        function)
29192                     ? R1_REGNUM : R0_REGNUM);
29193
29194   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29195   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29196   reload_completed = 1;
29197   emit_note (NOTE_INSN_PROLOGUE_END);
29198
29199   /* Add DELTA to THIS_RTX.  */
29200   if (delta != 0)
29201     arm_split_constant (PLUS, Pmode, NULL_RTX,
29202                         delta, this_rtx, this_rtx, false);
29203
29204   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
29205   if (vcall_offset != 0)
29206     {
29207       /* Load *THIS_RTX.  */
29208       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29209       /* Compute *THIS_RTX + VCALL_OFFSET.  */
29210       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29211                           false);
29212       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
29213       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29214       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29215     }
29216
29217   /* Generate a tail call to the target function.  */
29218   if (!TREE_USED (function))
29219     {
29220       assemble_external (function);
29221       TREE_USED (function) = 1;
29222     }
29223   rtx funexp = XEXP (DECL_RTL (function), 0);
29224   if (long_call_p)
29225     {
29226       emit_move_insn (temp, funexp);
29227       funexp = temp;
29228     }
29229   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29230   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29231   SIBLING_CALL_P (insn) = 1;
29232   emit_barrier ();
29233
29234   /* Indirect calls require a bit of fixup in PIC mode.  */
29235   if (long_call_p)
29236     {
29237       split_all_insns_noflow ();
29238       arm_reorg ();
29239     }
29240
29241   insn = get_insns ();
29242   shorten_branches (insn);
29243   final_start_function (insn, file, 1);
29244   final (insn, file, 1);
29245   final_end_function ();
29246
29247   /* Stop pretending this is a post-reload pass.  */
29248   reload_completed = 0;
29249 }
29250
29251 /* Output code to add DELTA to the first argument, and then jump
29252    to FUNCTION.  Used for C++ multiple inheritance.  */
29253
29254 static void
29255 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29256                      HOST_WIDE_INT vcall_offset, tree function)
29257 {
29258   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29259
29260   assemble_start_function (thunk, fnname);
29261   if (TARGET_32BIT)
29262     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29263   else
29264     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29265   assemble_end_function (thunk, fnname);
29266 }
29267
29268 int
29269 arm_emit_vector_const (FILE *file, rtx x)
29270 {
29271   int i;
29272   const char * pattern;
29273
29274   gcc_assert (GET_CODE (x) == CONST_VECTOR);
29275
29276   switch (GET_MODE (x))
29277     {
29278     case E_V2SImode: pattern = "%08x"; break;
29279     case E_V4HImode: pattern = "%04x"; break;
29280     case E_V8QImode: pattern = "%02x"; break;
29281     default:       gcc_unreachable ();
29282     }
29283
29284   fprintf (file, "0x");
29285   for (i = CONST_VECTOR_NUNITS (x); i--;)
29286     {
29287       rtx element;
29288
29289       element = CONST_VECTOR_ELT (x, i);
29290       fprintf (file, pattern, INTVAL (element));
29291     }
29292
29293   return 1;
29294 }
29295
29296 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29297    HFmode constant pool entries are actually loaded with ldr.  */
29298 void
29299 arm_emit_fp16_const (rtx c)
29300 {
29301   long bits;
29302
29303   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29304   if (WORDS_BIG_ENDIAN)
29305     assemble_zeros (2);
29306   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29307   if (!WORDS_BIG_ENDIAN)
29308     assemble_zeros (2);
29309 }
29310
29311 const char *
29312 arm_output_load_gr (rtx *operands)
29313 {
29314   rtx reg;
29315   rtx offset;
29316   rtx wcgr;
29317   rtx sum;
29318
29319   if (!MEM_P (operands [1])
29320       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29321       || !REG_P (reg = XEXP (sum, 0))
29322       || !CONST_INT_P (offset = XEXP (sum, 1))
29323       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29324     return "wldrw%?\t%0, %1";
29325
29326   /* Fix up an out-of-range load of a GR register.  */
29327   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29328   wcgr = operands[0];
29329   operands[0] = reg;
29330   output_asm_insn ("ldr%?\t%0, %1", operands);
29331
29332   operands[0] = wcgr;
29333   operands[1] = reg;
29334   output_asm_insn ("tmcr%?\t%0, %1", operands);
29335   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29336
29337   return "";
29338 }
29339
29340 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29341
29342    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29343    named arg and all anonymous args onto the stack.
29344    XXX I know the prologue shouldn't be pushing registers, but it is faster
29345    that way.  */
29346
29347 static void
29348 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29349                             const function_arg_info &arg,
29350                             int *pretend_size,
29351                             int second_time ATTRIBUTE_UNUSED)
29352 {
29353   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29354   int nregs;
29355
29356   cfun->machine->uses_anonymous_args = 1;
29357   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29358     {
29359       nregs = pcum->aapcs_ncrn;
29360       if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29361           && (nregs & 1))
29362         {
29363           int res = arm_needs_doubleword_align (arg.mode, arg.type);
29364           if (res < 0 && warn_psabi)
29365             inform (input_location, "parameter passing for argument of "
29366                     "type %qT changed in GCC 7.1", arg.type);
29367           else if (res > 0)
29368             {
29369               nregs++;
29370               if (res > 1 && warn_psabi)
29371                 inform (input_location,
29372                         "parameter passing for argument of type "
29373                         "%qT changed in GCC 9.1", arg.type);
29374             }
29375         }
29376     }
29377   else
29378     nregs = pcum->nregs;
29379
29380   if (nregs < NUM_ARG_REGS)
29381     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29382 }
29383
29384 /* We can't rely on the caller doing the proper promotion when
29385    using APCS or ATPCS.  */
29386
29387 static bool
29388 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29389 {
29390     return !TARGET_AAPCS_BASED;
29391 }
29392
29393 static machine_mode
29394 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29395                            machine_mode mode,
29396                            int *punsignedp ATTRIBUTE_UNUSED,
29397                            const_tree fntype ATTRIBUTE_UNUSED,
29398                            int for_return ATTRIBUTE_UNUSED)
29399 {
29400   if (GET_MODE_CLASS (mode) == MODE_INT
29401       && GET_MODE_SIZE (mode) < 4)
29402     return SImode;
29403
29404   return mode;
29405 }
29406
29407
29408 static bool
29409 arm_default_short_enums (void)
29410 {
29411   return ARM_DEFAULT_SHORT_ENUMS;
29412 }
29413
29414
29415 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
29416
29417 static bool
29418 arm_align_anon_bitfield (void)
29419 {
29420   return TARGET_AAPCS_BASED;
29421 }
29422
29423
29424 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
29425
29426 static tree
29427 arm_cxx_guard_type (void)
29428 {
29429   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29430 }
29431
29432
29433 /* The EABI says test the least significant bit of a guard variable.  */
29434
29435 static bool
29436 arm_cxx_guard_mask_bit (void)
29437 {
29438   return TARGET_AAPCS_BASED;
29439 }
29440
29441
29442 /* The EABI specifies that all array cookies are 8 bytes long.  */
29443
29444 static tree
29445 arm_get_cookie_size (tree type)
29446 {
29447   tree size;
29448
29449   if (!TARGET_AAPCS_BASED)
29450     return default_cxx_get_cookie_size (type);
29451
29452   size = build_int_cst (sizetype, 8);
29453   return size;
29454 }
29455
29456
29457 /* The EABI says that array cookies should also contain the element size.  */
29458
29459 static bool
29460 arm_cookie_has_size (void)
29461 {
29462   return TARGET_AAPCS_BASED;
29463 }
29464
29465
29466 /* The EABI says constructors and destructors should return a pointer to
29467    the object constructed/destroyed.  */
29468
29469 static bool
29470 arm_cxx_cdtor_returns_this (void)
29471 {
29472   return TARGET_AAPCS_BASED;
29473 }
29474
29475 /* The EABI says that an inline function may never be the key
29476    method.  */
29477
29478 static bool
29479 arm_cxx_key_method_may_be_inline (void)
29480 {
29481   return !TARGET_AAPCS_BASED;
29482 }
29483
29484 static void
29485 arm_cxx_determine_class_data_visibility (tree decl)
29486 {
29487   if (!TARGET_AAPCS_BASED
29488       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29489     return;
29490
29491   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29492      is exported.  However, on systems without dynamic vague linkage,
29493      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29494   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29495     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29496   else
29497     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29498   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29499 }
29500
29501 static bool
29502 arm_cxx_class_data_always_comdat (void)
29503 {
29504   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29505      vague linkage if the class has no key function.  */
29506   return !TARGET_AAPCS_BASED;
29507 }
29508
29509
29510 /* The EABI says __aeabi_atexit should be used to register static
29511    destructors.  */
29512
29513 static bool
29514 arm_cxx_use_aeabi_atexit (void)
29515 {
29516   return TARGET_AAPCS_BASED;
29517 }
29518
29519
29520 void
29521 arm_set_return_address (rtx source, rtx scratch)
29522 {
29523   arm_stack_offsets *offsets;
29524   HOST_WIDE_INT delta;
29525   rtx addr, mem;
29526   unsigned long saved_regs;
29527
29528   offsets = arm_get_frame_offsets ();
29529   saved_regs = offsets->saved_regs_mask;
29530
29531   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29532     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29533   else
29534     {
29535       if (frame_pointer_needed)
29536         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29537       else
29538         {
29539           /* LR will be the first saved register.  */
29540           delta = offsets->outgoing_args - (offsets->frame + 4);
29541
29542
29543           if (delta >= 4096)
29544             {
29545               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29546                                      GEN_INT (delta & ~4095)));
29547               addr = scratch;
29548               delta &= 4095;
29549             }
29550           else
29551             addr = stack_pointer_rtx;
29552
29553           addr = plus_constant (Pmode, addr, delta);
29554         }
29555
29556       /* The store needs to be marked to prevent DSE from deleting
29557          it as dead if it is based on fp.  */
29558       mem = gen_frame_mem (Pmode, addr);
29559       MEM_VOLATILE_P (mem) = true;
29560       emit_move_insn (mem, source);
29561     }
29562 }
29563
29564
29565 void
29566 thumb_set_return_address (rtx source, rtx scratch)
29567 {
29568   arm_stack_offsets *offsets;
29569   HOST_WIDE_INT delta;
29570   HOST_WIDE_INT limit;
29571   int reg;
29572   rtx addr, mem;
29573   unsigned long mask;
29574
29575   emit_use (source);
29576
29577   offsets = arm_get_frame_offsets ();
29578   mask = offsets->saved_regs_mask;
29579   if (mask & (1 << LR_REGNUM))
29580     {
29581       limit = 1024;
29582       /* Find the saved regs.  */
29583       if (frame_pointer_needed)
29584         {
29585           delta = offsets->soft_frame - offsets->saved_args;
29586           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29587           if (TARGET_THUMB1)
29588             limit = 128;
29589         }
29590       else
29591         {
29592           delta = offsets->outgoing_args - offsets->saved_args;
29593           reg = SP_REGNUM;
29594         }
29595       /* Allow for the stack frame.  */
29596       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29597         delta -= 16;
29598       /* The link register is always the first saved register.  */
29599       delta -= 4;
29600
29601       /* Construct the address.  */
29602       addr = gen_rtx_REG (SImode, reg);
29603       if (delta > limit)
29604         {
29605           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29606           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29607           addr = scratch;
29608         }
29609       else
29610         addr = plus_constant (Pmode, addr, delta);
29611
29612       /* The store needs to be marked to prevent DSE from deleting
29613          it as dead if it is based on fp.  */
29614       mem = gen_frame_mem (Pmode, addr);
29615       MEM_VOLATILE_P (mem) = true;
29616       emit_move_insn (mem, source);
29617     }
29618   else
29619     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29620 }
29621
29622 /* Implements target hook vector_mode_supported_p.  */
29623 bool
29624 arm_vector_mode_supported_p (machine_mode mode)
29625 {
29626   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29627   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29628       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29629       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29630       || mode == V8BFmode))
29631     return true;
29632
29633   if ((TARGET_NEON || TARGET_IWMMXT)
29634       && ((mode == V2SImode)
29635           || (mode == V4HImode)
29636           || (mode == V8QImode)))
29637     return true;
29638
29639   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29640       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29641       || mode == V2HAmode))
29642     return true;
29643
29644   if (TARGET_HAVE_MVE
29645       && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29646     return true;
29647
29648   if (TARGET_HAVE_MVE_FLOAT
29649       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29650     return true;
29651
29652   return false;
29653 }
29654
29655 /* Implements target hook array_mode_supported_p.  */
29656
29657 static bool
29658 arm_array_mode_supported_p (machine_mode mode,
29659                             unsigned HOST_WIDE_INT nelems)
29660 {
29661   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29662      for now, as the lane-swapping logic needs to be extended in the expanders.
29663      See PR target/82518.  */
29664   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29665       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29666       && (nelems >= 2 && nelems <= 4))
29667     return true;
29668
29669   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29670       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29671     return true;
29672
29673   return false;
29674 }
29675
29676 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29677    registers when autovectorizing for Neon, at least until multiple vector
29678    widths are supported properly by the middle-end.  */
29679
29680 static machine_mode
29681 arm_preferred_simd_mode (scalar_mode mode)
29682 {
29683   if (TARGET_NEON)
29684     switch (mode)
29685       {
29686       case E_HFmode:
29687         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29688       case E_SFmode:
29689         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29690       case E_SImode:
29691         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29692       case E_HImode:
29693         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29694       case E_QImode:
29695         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29696       case E_DImode:
29697         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29698           return V2DImode;
29699         break;
29700
29701       default:;
29702       }
29703
29704   if (TARGET_REALLY_IWMMXT)
29705     switch (mode)
29706       {
29707       case E_SImode:
29708         return V2SImode;
29709       case E_HImode:
29710         return V4HImode;
29711       case E_QImode:
29712         return V8QImode;
29713
29714       default:;
29715       }
29716
29717   if (TARGET_HAVE_MVE)
29718     switch (mode)
29719       {
29720       case E_QImode:
29721         return V16QImode;
29722       case E_HImode:
29723         return V8HImode;
29724       case E_SImode:
29725         return V4SImode;
29726
29727       default:;
29728       }
29729
29730   if (TARGET_HAVE_MVE_FLOAT)
29731     switch (mode)
29732       {
29733       case E_HFmode:
29734         return V8HFmode;
29735       case E_SFmode:
29736         return V4SFmode;
29737
29738       default:;
29739       }
29740
29741   return word_mode;
29742 }
29743
29744 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29745
29746    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29747    using r0-r4 for function arguments, r7 for the stack frame and don't have
29748    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29749    potentially problematic instructions accept high registers so this is not
29750    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29751    that require many low registers.  */
29752 static bool
29753 arm_class_likely_spilled_p (reg_class_t rclass)
29754 {
29755   if ((TARGET_THUMB1 && rclass == LO_REGS)
29756       || rclass  == CC_REG)
29757     return true;
29758
29759   return default_class_likely_spilled_p (rclass);
29760 }
29761
29762 /* Implements target hook small_register_classes_for_mode_p.  */
29763 bool
29764 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29765 {
29766   return TARGET_THUMB1;
29767 }
29768
29769 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29770    ARM insns and therefore guarantee that the shift count is modulo 256.
29771    DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29772    guarantee no particular behavior for out-of-range counts.  */
29773
29774 static unsigned HOST_WIDE_INT
29775 arm_shift_truncation_mask (machine_mode mode)
29776 {
29777   return mode == SImode ? 255 : 0;
29778 }
29779
29780
29781 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29782
29783 unsigned int
29784 arm_debugger_regno (unsigned int regno)
29785 {
29786   if (regno < 16)
29787     return regno;
29788
29789   if (IS_VFP_REGNUM (regno))
29790     {
29791       /* See comment in arm_dwarf_register_span.  */
29792       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29793         return 64 + regno - FIRST_VFP_REGNUM;
29794       else
29795         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29796     }
29797
29798   if (IS_IWMMXT_GR_REGNUM (regno))
29799     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29800
29801   if (IS_IWMMXT_REGNUM (regno))
29802     return 112 + regno - FIRST_IWMMXT_REGNUM;
29803
29804   if (IS_PAC_REGNUM (regno))
29805     return DWARF_PAC_REGNUM;
29806
29807   return DWARF_FRAME_REGISTERS;
29808 }
29809
29810 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29811    GCC models tham as 64 32-bit registers, so we need to describe this to
29812    the DWARF generation code.  Other registers can use the default.  */
29813 static rtx
29814 arm_dwarf_register_span (rtx rtl)
29815 {
29816   machine_mode mode;
29817   unsigned regno;
29818   rtx parts[16];
29819   int nregs;
29820   int i;
29821
29822   regno = REGNO (rtl);
29823   if (!IS_VFP_REGNUM (regno))
29824     return NULL_RTX;
29825
29826   /* XXX FIXME: The EABI defines two VFP register ranges:
29827         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29828         256-287: D0-D31
29829      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29830      corresponding D register.  Until GDB supports this, we shall use the
29831      legacy encodings.  We also use these encodings for D0-D15 for
29832      compatibility with older debuggers.  */
29833   mode = GET_MODE (rtl);
29834   if (GET_MODE_SIZE (mode) < 8)
29835     return NULL_RTX;
29836
29837   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29838     {
29839       nregs = GET_MODE_SIZE (mode) / 4;
29840       for (i = 0; i < nregs; i += 2)
29841         if (TARGET_BIG_END)
29842           {
29843             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29844             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29845           }
29846         else
29847           {
29848             parts[i] = gen_rtx_REG (SImode, regno + i);
29849             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29850           }
29851     }
29852   else
29853     {
29854       nregs = GET_MODE_SIZE (mode) / 8;
29855       for (i = 0; i < nregs; i++)
29856         parts[i] = gen_rtx_REG (DImode, regno + i);
29857     }
29858
29859   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29860 }
29861
29862 #if ARM_UNWIND_INFO
29863 /* Emit unwind directives for a store-multiple instruction or stack pointer
29864    push during alignment.
29865    These should only ever be generated by the function prologue code, so
29866    expect them to have a particular form.
29867    The store-multiple instruction sometimes pushes pc as the last register,
29868    although it should not be tracked into unwind information, or for -Os
29869    sometimes pushes some dummy registers before first register that needs
29870    to be tracked in unwind information; such dummy registers are there just
29871    to avoid separate stack adjustment, and will not be restored in the
29872    epilogue.  */
29873
29874 static void
29875 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29876 {
29877   int i;
29878   HOST_WIDE_INT offset;
29879   HOST_WIDE_INT nregs;
29880   int reg_size;
29881   unsigned reg;
29882   unsigned lastreg;
29883   unsigned padfirst = 0, padlast = 0;
29884   rtx e;
29885
29886   e = XVECEXP (p, 0, 0);
29887   gcc_assert (GET_CODE (e) == SET);
29888
29889   /* First insn will adjust the stack pointer.  */
29890   gcc_assert (GET_CODE (e) == SET
29891               && REG_P (SET_DEST (e))
29892               && REGNO (SET_DEST (e)) == SP_REGNUM
29893               && GET_CODE (SET_SRC (e)) == PLUS);
29894
29895   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29896   nregs = XVECLEN (p, 0) - 1;
29897   gcc_assert (nregs);
29898
29899   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29900   if (reg < 16 || IS_PAC_REGNUM (reg))
29901     {
29902       /* For -Os dummy registers can be pushed at the beginning to
29903          avoid separate stack pointer adjustment.  */
29904       e = XVECEXP (p, 0, 1);
29905       e = XEXP (SET_DEST (e), 0);
29906       if (GET_CODE (e) == PLUS)
29907         padfirst = INTVAL (XEXP (e, 1));
29908       gcc_assert (padfirst == 0 || optimize_size);
29909       /* The function prologue may also push pc, but not annotate it as it is
29910          never restored.  We turn this into a stack pointer adjustment.  */
29911       e = XVECEXP (p, 0, nregs);
29912       e = XEXP (SET_DEST (e), 0);
29913       if (GET_CODE (e) == PLUS)
29914         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29915       else
29916         padlast = offset - 4;
29917       gcc_assert (padlast == 0 || padlast == 4);
29918       if (padlast == 4)
29919         fprintf (out_file, "\t.pad #4\n");
29920       reg_size = 4;
29921       fprintf (out_file, "\t.save {");
29922     }
29923   else if (IS_VFP_REGNUM (reg))
29924     {
29925       reg_size = 8;
29926       fprintf (out_file, "\t.vsave {");
29927     }
29928   else
29929     /* Unknown register type.  */
29930     gcc_unreachable ();
29931
29932   /* If the stack increment doesn't match the size of the saved registers,
29933      something has gone horribly wrong.  */
29934   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29935
29936   offset = padfirst;
29937   lastreg = 0;
29938   /* The remaining insns will describe the stores.  */
29939   for (i = 1; i <= nregs; i++)
29940     {
29941       /* Expect (set (mem <addr>) (reg)).
29942          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29943       e = XVECEXP (p, 0, i);
29944       gcc_assert (GET_CODE (e) == SET
29945                   && MEM_P (SET_DEST (e))
29946                   && REG_P (SET_SRC (e)));
29947
29948       reg = REGNO (SET_SRC (e));
29949       gcc_assert (reg >= lastreg);
29950
29951       if (i != 1)
29952         fprintf (out_file, ", ");
29953       /* We can't use %r for vfp because we need to use the
29954          double precision register names.  */
29955       if (IS_VFP_REGNUM (reg))
29956         asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29957       else if (IS_PAC_REGNUM (reg))
29958         asm_fprintf (asm_out_file, "ra_auth_code");
29959       else
29960         asm_fprintf (out_file, "%r", reg);
29961
29962       if (flag_checking)
29963         {
29964           /* Check that the addresses are consecutive.  */
29965           e = XEXP (SET_DEST (e), 0);
29966           if (GET_CODE (e) == PLUS)
29967             gcc_assert (REG_P (XEXP (e, 0))
29968                         && REGNO (XEXP (e, 0)) == SP_REGNUM
29969                         && CONST_INT_P (XEXP (e, 1))
29970                         && offset == INTVAL (XEXP (e, 1)));
29971           else
29972             gcc_assert (i == 1
29973                         && REG_P (e)
29974                         && REGNO (e) == SP_REGNUM);
29975           offset += reg_size;
29976         }
29977     }
29978   fprintf (out_file, "}\n");
29979   if (padfirst)
29980     fprintf (out_file, "\t.pad #%d\n", padfirst);
29981 }
29982
29983 /*  Emit unwind directives for a SET.  */
29984
29985 static void
29986 arm_unwind_emit_set (FILE * out_file, rtx p)
29987 {
29988   rtx e0;
29989   rtx e1;
29990   unsigned reg;
29991
29992   e0 = XEXP (p, 0);
29993   e1 = XEXP (p, 1);
29994   switch (GET_CODE (e0))
29995     {
29996     case MEM:
29997       /* Pushing a single register.  */
29998       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29999           || !REG_P (XEXP (XEXP (e0, 0), 0))
30000           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
30001         abort ();
30002
30003       asm_fprintf (out_file, "\t.save ");
30004       if (IS_VFP_REGNUM (REGNO (e1)))
30005         asm_fprintf(out_file, "{d%d}\n",
30006                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
30007       else
30008         asm_fprintf(out_file, "{%r}\n", REGNO (e1));
30009       break;
30010
30011     case REG:
30012       if (REGNO (e0) == SP_REGNUM)
30013         {
30014           /* A stack increment.  */
30015           if (GET_CODE (e1) != PLUS
30016               || !REG_P (XEXP (e1, 0))
30017               || REGNO (XEXP (e1, 0)) != SP_REGNUM
30018               || !CONST_INT_P (XEXP (e1, 1)))
30019             abort ();
30020
30021           asm_fprintf (out_file, "\t.pad #%wd\n",
30022                        -INTVAL (XEXP (e1, 1)));
30023         }
30024       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
30025         {
30026           HOST_WIDE_INT offset;
30027
30028           if (GET_CODE (e1) == PLUS)
30029             {
30030               if (!REG_P (XEXP (e1, 0))
30031                   || !CONST_INT_P (XEXP (e1, 1)))
30032                 abort ();
30033               reg = REGNO (XEXP (e1, 0));
30034               offset = INTVAL (XEXP (e1, 1));
30035               asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
30036                            HARD_FRAME_POINTER_REGNUM, reg,
30037                            offset);
30038             }
30039           else if (REG_P (e1))
30040             {
30041               reg = REGNO (e1);
30042               asm_fprintf (out_file, "\t.setfp %r, %r\n",
30043                            HARD_FRAME_POINTER_REGNUM, reg);
30044             }
30045           else
30046             abort ();
30047         }
30048       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30049         {
30050           /* Move from sp to reg.  */
30051           asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30052         }
30053       else if (GET_CODE (e1) == PLUS
30054               && REG_P (XEXP (e1, 0))
30055               && REGNO (XEXP (e1, 0)) == SP_REGNUM
30056               && CONST_INT_P (XEXP (e1, 1)))
30057         {
30058           /* Set reg to offset from sp.  */
30059           asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30060                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30061         }
30062       else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30063         {
30064           if (cfun->machine->pacspval_needed)
30065             asm_fprintf (out_file, "\t.pacspval\n");
30066         }
30067       else
30068         abort ();
30069       break;
30070
30071     default:
30072       abort ();
30073     }
30074 }
30075
30076
30077 /* Emit unwind directives for the given insn.  */
30078
30079 static void
30080 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30081 {
30082   rtx note, pat;
30083   bool handled_one = false;
30084
30085   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30086     return;
30087
30088   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30089       && (TREE_NOTHROW (current_function_decl)
30090           || crtl->all_throwers_are_sibcalls))
30091     return;
30092
30093   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30094     return;
30095
30096   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30097     {
30098       switch (REG_NOTE_KIND (note))
30099         {
30100         case REG_FRAME_RELATED_EXPR:
30101           pat = XEXP (note, 0);
30102           goto found;
30103
30104         case REG_CFA_REGISTER:
30105           pat = XEXP (note, 0);
30106           if (pat == NULL)
30107             {
30108               pat = PATTERN (insn);
30109               if (GET_CODE (pat) == PARALLEL)
30110                 pat = XVECEXP (pat, 0, 0);
30111             }
30112
30113           /* Only emitted for IS_STACKALIGN re-alignment.  */
30114           {
30115             rtx dest, src;
30116             unsigned reg;
30117
30118             src = SET_SRC (pat);
30119             dest = SET_DEST (pat);
30120
30121             gcc_assert (src == stack_pointer_rtx
30122                         || IS_PAC_REGNUM (REGNO (src)));
30123             reg = REGNO (dest);
30124
30125             if (IS_PAC_REGNUM (REGNO (src)))
30126               arm_unwind_emit_set (out_file, PATTERN (insn));
30127             else
30128               asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30129                            reg + 0x90, reg);
30130           }
30131           handled_one = true;
30132           break;
30133
30134         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
30135            to get correct dwarf information for shrink-wrap.  We should not
30136            emit unwind information for it because these are used either for
30137            pretend arguments or notes to adjust sp and restore registers from
30138            stack.  */
30139         case REG_CFA_DEF_CFA:
30140         case REG_CFA_ADJUST_CFA:
30141         case REG_CFA_RESTORE:
30142           return;
30143
30144         case REG_CFA_EXPRESSION:
30145         case REG_CFA_OFFSET:
30146           /* ??? Only handling here what we actually emit.  */
30147           gcc_unreachable ();
30148
30149         default:
30150           break;
30151         }
30152     }
30153   if (handled_one)
30154     return;
30155   pat = PATTERN (insn);
30156  found:
30157
30158   switch (GET_CODE (pat))
30159     {
30160     case SET:
30161       arm_unwind_emit_set (out_file, pat);
30162       break;
30163
30164     case SEQUENCE:
30165       /* Store multiple.  */
30166       arm_unwind_emit_sequence (out_file, pat);
30167       break;
30168
30169     default:
30170       abort();
30171     }
30172 }
30173
30174
30175 /* Output a reference from a function exception table to the type_info
30176    object X.  The EABI specifies that the symbol should be relocated by
30177    an R_ARM_TARGET2 relocation.  */
30178
30179 static bool
30180 arm_output_ttype (rtx x)
30181 {
30182   fputs ("\t.word\t", asm_out_file);
30183   output_addr_const (asm_out_file, x);
30184   /* Use special relocations for symbol references.  */
30185   if (!CONST_INT_P (x))
30186     fputs ("(TARGET2)", asm_out_file);
30187   fputc ('\n', asm_out_file);
30188
30189   return TRUE;
30190 }
30191
30192 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
30193
30194 static void
30195 arm_asm_emit_except_personality (rtx personality)
30196 {
30197   fputs ("\t.personality\t", asm_out_file);
30198   output_addr_const (asm_out_file, personality);
30199   fputc ('\n', asm_out_file);
30200 }
30201 #endif /* ARM_UNWIND_INFO */
30202
30203 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
30204
30205 static void
30206 arm_asm_init_sections (void)
30207 {
30208 #if ARM_UNWIND_INFO
30209   exception_section = get_unnamed_section (0, output_section_asm_op,
30210                                            "\t.handlerdata");
30211 #endif /* ARM_UNWIND_INFO */
30212
30213 #ifdef OBJECT_FORMAT_ELF
30214   if (target_pure_code)
30215     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30216 #endif
30217 }
30218
30219 /* Output unwind directives for the start/end of a function.  */
30220
30221 void
30222 arm_output_fn_unwind (FILE * f, bool prologue)
30223 {
30224   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30225     return;
30226
30227   if (prologue)
30228     fputs ("\t.fnstart\n", f);
30229   else
30230     {
30231       /* If this function will never be unwound, then mark it as such.
30232          The came condition is used in arm_unwind_emit to suppress
30233          the frame annotations.  */
30234       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30235           && (TREE_NOTHROW (current_function_decl)
30236               || crtl->all_throwers_are_sibcalls))
30237         fputs("\t.cantunwind\n", f);
30238
30239       fputs ("\t.fnend\n", f);
30240     }
30241 }
30242
30243 static bool
30244 arm_emit_tls_decoration (FILE *fp, rtx x)
30245 {
30246   enum tls_reloc reloc;
30247   rtx val;
30248
30249   val = XVECEXP (x, 0, 0);
30250   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30251
30252   output_addr_const (fp, val);
30253
30254   switch (reloc)
30255     {
30256     case TLS_GD32:
30257       fputs ("(tlsgd)", fp);
30258       break;
30259     case TLS_GD32_FDPIC:
30260       fputs ("(tlsgd_fdpic)", fp);
30261       break;
30262     case TLS_LDM32:
30263       fputs ("(tlsldm)", fp);
30264       break;
30265     case TLS_LDM32_FDPIC:
30266       fputs ("(tlsldm_fdpic)", fp);
30267       break;
30268     case TLS_LDO32:
30269       fputs ("(tlsldo)", fp);
30270       break;
30271     case TLS_IE32:
30272       fputs ("(gottpoff)", fp);
30273       break;
30274     case TLS_IE32_FDPIC:
30275       fputs ("(gottpoff_fdpic)", fp);
30276       break;
30277     case TLS_LE32:
30278       fputs ("(tpoff)", fp);
30279       break;
30280     case TLS_DESCSEQ:
30281       fputs ("(tlsdesc)", fp);
30282       break;
30283     default:
30284       gcc_unreachable ();
30285     }
30286
30287   switch (reloc)
30288     {
30289     case TLS_GD32:
30290     case TLS_LDM32:
30291     case TLS_IE32:
30292     case TLS_DESCSEQ:
30293       fputs (" + (. - ", fp);
30294       output_addr_const (fp, XVECEXP (x, 0, 2));
30295       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30296       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30297       output_addr_const (fp, XVECEXP (x, 0, 3));
30298       fputc (')', fp);
30299       break;
30300     default:
30301       break;
30302     }
30303
30304   return TRUE;
30305 }
30306
30307 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
30308
30309 static void
30310 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30311 {
30312   gcc_assert (size == 4);
30313   fputs ("\t.word\t", file);
30314   output_addr_const (file, x);
30315   fputs ("(tlsldo)", file);
30316 }
30317
30318 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
30319
30320 static bool
30321 arm_output_addr_const_extra (FILE *fp, rtx x)
30322 {
30323   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30324     return arm_emit_tls_decoration (fp, x);
30325   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30326     {
30327       char label[256];
30328       int labelno = INTVAL (XVECEXP (x, 0, 0));
30329
30330       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30331       assemble_name_raw (fp, label);
30332
30333       return TRUE;
30334     }
30335   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30336     {
30337       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30338       if (GOT_PCREL)
30339         fputs ("+.", fp);
30340       fputs ("-(", fp);
30341       output_addr_const (fp, XVECEXP (x, 0, 0));
30342       fputc (')', fp);
30343       return TRUE;
30344     }
30345   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30346     {
30347       output_addr_const (fp, XVECEXP (x, 0, 0));
30348       if (GOT_PCREL)
30349         fputs ("+.", fp);
30350       fputs ("-(", fp);
30351       output_addr_const (fp, XVECEXP (x, 0, 1));
30352       fputc (')', fp);
30353       return TRUE;
30354     }
30355   else if (GET_CODE (x) == CONST_VECTOR)
30356     return arm_emit_vector_const (fp, x);
30357
30358   return FALSE;
30359 }
30360
30361 /* Output assembly for a shift instruction.
30362    SET_FLAGS determines how the instruction modifies the condition codes.
30363    0 - Do not set condition codes.
30364    1 - Set condition codes.
30365    2 - Use smallest instruction.  */
30366 const char *
30367 arm_output_shift(rtx * operands, int set_flags)
30368 {
30369   char pattern[100];
30370   static const char flag_chars[3] = {'?', '.', '!'};
30371   const char *shift;
30372   HOST_WIDE_INT val;
30373   char c;
30374
30375   c = flag_chars[set_flags];
30376   shift = shift_op(operands[3], &val);
30377   if (shift)
30378     {
30379       if (val != -1)
30380         operands[2] = GEN_INT(val);
30381       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30382     }
30383   else
30384     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30385
30386   output_asm_insn (pattern, operands);
30387   return "";
30388 }
30389
30390 /* Output assembly for a WMMX immediate shift instruction.  */
30391 const char *
30392 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30393 {
30394   int shift = INTVAL (operands[2]);
30395   char templ[50];
30396   machine_mode opmode = GET_MODE (operands[0]);
30397
30398   gcc_assert (shift >= 0);
30399
30400   /* If the shift value in the register versions is > 63 (for D qualifier),
30401      31 (for W qualifier) or 15 (for H qualifier).  */
30402   if (((opmode == V4HImode) && (shift > 15))
30403         || ((opmode == V2SImode) && (shift > 31))
30404         || ((opmode == DImode) && (shift > 63)))
30405   {
30406     if (wror_or_wsra)
30407       {
30408         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30409         output_asm_insn (templ, operands);
30410         if (opmode == DImode)
30411           {
30412             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30413             output_asm_insn (templ, operands);
30414           }
30415       }
30416     else
30417       {
30418         /* The destination register will contain all zeros.  */
30419         sprintf (templ, "wzero\t%%0");
30420         output_asm_insn (templ, operands);
30421       }
30422     return "";
30423   }
30424
30425   if ((opmode == DImode) && (shift > 32))
30426     {
30427       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30428       output_asm_insn (templ, operands);
30429       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30430       output_asm_insn (templ, operands);
30431     }
30432   else
30433     {
30434       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30435       output_asm_insn (templ, operands);
30436     }
30437   return "";
30438 }
30439
30440 /* Output assembly for a WMMX tinsr instruction.  */
30441 const char *
30442 arm_output_iwmmxt_tinsr (rtx *operands)
30443 {
30444   int mask = INTVAL (operands[3]);
30445   int i;
30446   char templ[50];
30447   int units = mode_nunits[GET_MODE (operands[0])];
30448   gcc_assert ((mask & (mask - 1)) == 0);
30449   for (i = 0; i < units; ++i)
30450     {
30451       if ((mask & 0x01) == 1)
30452         {
30453           break;
30454         }
30455       mask >>= 1;
30456     }
30457   gcc_assert (i < units);
30458   {
30459     switch (GET_MODE (operands[0]))
30460       {
30461       case E_V8QImode:
30462         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30463         break;
30464       case E_V4HImode:
30465         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30466         break;
30467       case E_V2SImode:
30468         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30469         break;
30470       default:
30471         gcc_unreachable ();
30472         break;
30473       }
30474     output_asm_insn (templ, operands);
30475   }
30476   return "";
30477 }
30478
30479 /* Output an arm casesi dispatch sequence.  Used by arm_casesi_internal insn.
30480    Responsible for the handling of switch statements in arm.  */
30481 const char *
30482 arm_output_casesi (rtx *operands)
30483 {
30484   char label[100];
30485   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30486   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30487   output_asm_insn ("cmp\t%0, %1", operands);
30488   output_asm_insn ("bhi\t%l3", operands);
30489   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
30490   switch (GET_MODE (diff_vec))
30491     {
30492     case E_QImode:
30493       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30494         output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
30495       else
30496         output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
30497       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30498       break;
30499     case E_HImode:
30500       if (REGNO (operands[4]) != REGNO (operands[5]))
30501         {
30502           output_asm_insn ("add\t%4, %0, %0", operands);
30503           if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30504             output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
30505           else
30506             output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
30507         }
30508       else
30509         {
30510           output_asm_insn ("add\t%4, %5, %0", operands);
30511           if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30512             output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
30513           else
30514             output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
30515         }
30516       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30517       break;
30518     case E_SImode:
30519       if (flag_pic)
30520         {
30521           output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
30522           output_asm_insn ("add\t%|pc, %|pc, %4", operands);
30523         }
30524       else
30525         output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
30526       break;
30527     default:
30528       gcc_unreachable ();
30529     }
30530     assemble_label (asm_out_file, label);
30531     output_asm_insn ("nop", operands);
30532   return "";
30533 }
30534
30535 /* Output a Thumb-1 casesi dispatch sequence.  */
30536 const char *
30537 thumb1_output_casesi (rtx *operands)
30538 {
30539   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30540
30541   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30542
30543   switch (GET_MODE(diff_vec))
30544     {
30545     case E_QImode:
30546       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30547               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30548     case E_HImode:
30549       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30550               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30551     case E_SImode:
30552       return "bl\t%___gnu_thumb1_case_si";
30553     default:
30554       gcc_unreachable ();
30555     }
30556 }
30557
30558 /* Output a Thumb-2 casesi instruction.  */
30559 const char *
30560 thumb2_output_casesi (rtx *operands)
30561 {
30562   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30563
30564   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30565
30566   output_asm_insn ("cmp\t%0, %1", operands);
30567   output_asm_insn ("bhi\t%l3", operands);
30568   switch (GET_MODE(diff_vec))
30569     {
30570     case E_QImode:
30571       return "tbb\t[%|pc, %0]";
30572     case E_HImode:
30573       return "tbh\t[%|pc, %0, lsl #1]";
30574     case E_SImode:
30575       if (flag_pic)
30576         {
30577           output_asm_insn ("adr\t%4, %l2", operands);
30578           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30579           output_asm_insn ("add\t%4, %4, %5", operands);
30580           return "bx\t%4";
30581         }
30582       else
30583         {
30584           output_asm_insn ("adr\t%4, %l2", operands);
30585           return "ldr\t%|pc, [%4, %0, lsl #2]";
30586         }
30587     default:
30588       gcc_unreachable ();
30589     }
30590 }
30591
30592 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30593    per-core tuning structs.  */
30594 static int
30595 arm_issue_rate (void)
30596 {
30597   return current_tune->issue_rate;
30598 }
30599
30600 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30601 static int
30602 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30603 {
30604   if (DEBUG_INSN_P (insn))
30605     return more;
30606
30607   rtx_code code = GET_CODE (PATTERN (insn));
30608   if (code == USE || code == CLOBBER)
30609     return more;
30610
30611   if (get_attr_type (insn) == TYPE_NO_INSN)
30612     return more;
30613
30614   return more - 1;
30615 }
30616
30617 /* Return how many instructions should scheduler lookahead to choose the
30618    best one.  */
30619 static int
30620 arm_first_cycle_multipass_dfa_lookahead (void)
30621 {
30622   int issue_rate = arm_issue_rate ();
30623
30624   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30625 }
30626
30627 /* Enable modeling of L2 auto-prefetcher.  */
30628 static int
30629 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30630 {
30631   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30632 }
30633
30634 const char *
30635 arm_mangle_type (const_tree type)
30636 {
30637   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30638      has to be managled as if it is in the "std" namespace.  */
30639   if (TARGET_AAPCS_BASED
30640       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30641     return "St9__va_list";
30642
30643   /* Half-precision floating point types.  */
30644   if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
30645     {
30646       if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30647         return NULL;
30648       if (TYPE_MODE (type) == BFmode)
30649         return "u6__bf16";
30650       else
30651         return "Dh";
30652     }
30653
30654   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30655      builtin type.  */
30656   if (TYPE_NAME (type) != NULL)
30657     return arm_mangle_builtin_type (type);
30658
30659   /* Use the default mangling.  */
30660   return NULL;
30661 }
30662
30663 /* Order of allocation of core registers for Thumb: this allocation is
30664    written over the corresponding initial entries of the array
30665    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30666    first.  Saving and restoring a low register is usually cheaper than
30667    using a call-clobbered high register.  */
30668
30669 static const int thumb_core_reg_alloc_order[] =
30670 {
30671    3,  2,  1,  0,  4,  5,  6,  7,
30672   12, 14,  8,  9, 10, 11
30673 };
30674
30675 /* Adjust register allocation order when compiling for Thumb.  */
30676
30677 void
30678 arm_order_regs_for_local_alloc (void)
30679 {
30680   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30681   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30682   if (TARGET_THUMB)
30683     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30684             sizeof (thumb_core_reg_alloc_order));
30685 }
30686
30687 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30688
30689 bool
30690 arm_frame_pointer_required (void)
30691 {
30692   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30693     return true;
30694
30695   /* If the function receives nonlocal gotos, it needs to save the frame
30696      pointer in the nonlocal_goto_save_area object.  */
30697   if (cfun->has_nonlocal_label)
30698     return true;
30699
30700   /* The frame pointer is required for non-leaf APCS frames.  */
30701   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30702     return true;
30703
30704   /* If we are probing the stack in the prologue, we will have a faulting
30705      instruction prior to the stack adjustment and this requires a frame
30706      pointer if we want to catch the exception using the EABI unwinder.  */
30707   if (!IS_INTERRUPT (arm_current_func_type ())
30708       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30709           || flag_stack_clash_protection)
30710       && arm_except_unwind_info (&global_options) == UI_TARGET
30711       && cfun->can_throw_non_call_exceptions)
30712     {
30713       HOST_WIDE_INT size = get_frame_size ();
30714
30715       /* That's irrelevant if there is no stack adjustment.  */
30716       if (size <= 0)
30717         return false;
30718
30719       /* That's relevant only if there is a stack probe.  */
30720       if (crtl->is_leaf && !cfun->calls_alloca)
30721         {
30722           /* We don't have the final size of the frame so adjust.  */
30723           size += 32 * UNITS_PER_WORD;
30724           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30725             return true;
30726         }
30727       else
30728         return true;
30729     }
30730
30731   return false;
30732 }
30733
30734 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30735    All modes except THUMB1 have conditional execution.
30736    If we have conditional arithmetic, return false before reload to
30737    enable some ifcvt transformations. */
30738 static bool
30739 arm_have_conditional_execution (void)
30740 {
30741   bool has_cond_exec, enable_ifcvt_trans;
30742
30743   /* Only THUMB1 cannot support conditional execution. */
30744   has_cond_exec = !TARGET_THUMB1;
30745
30746   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30747      before reload. */
30748   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30749
30750   return has_cond_exec && !enable_ifcvt_trans;
30751 }
30752
30753 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30754 static HOST_WIDE_INT
30755 arm_vector_alignment (const_tree type)
30756 {
30757   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30758
30759   if (TARGET_AAPCS_BASED)
30760     align = MIN (align, 64);
30761
30762   return align;
30763 }
30764
30765 static unsigned int
30766 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30767 {
30768   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30769     {
30770       modes->safe_push (V16QImode);
30771       modes->safe_push (V8QImode);
30772     }
30773   return 0;
30774 }
30775
30776 static bool
30777 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30778 {
30779   /* Vectors which aren't in packed structures will not be less aligned than
30780      the natural alignment of their element type, so this is safe.  */
30781   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30782     return !is_packed;
30783
30784   return default_builtin_vector_alignment_reachable (type, is_packed);
30785 }
30786
30787 static bool
30788 arm_builtin_support_vector_misalignment (machine_mode mode,
30789                                          const_tree type, int misalignment,
30790                                          bool is_packed)
30791 {
30792   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30793     {
30794       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30795
30796       if (is_packed)
30797         return align == 1;
30798
30799       /* If the misalignment is unknown, we should be able to handle the access
30800          so long as it is not to a member of a packed data structure.  */
30801       if (misalignment == -1)
30802         return true;
30803
30804       /* Return true if the misalignment is a multiple of the natural alignment
30805          of the vector's element type.  This is probably always going to be
30806          true in practice, since we've already established that this isn't a
30807          packed access.  */
30808       return ((misalignment % align) == 0);
30809     }
30810
30811   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30812                                                       is_packed);
30813 }
30814
30815 static void
30816 arm_conditional_register_usage (void)
30817 {
30818   int regno;
30819
30820   if (TARGET_THUMB1 && optimize_size)
30821     {
30822       /* When optimizing for size on Thumb-1, it's better not
30823         to use the HI regs, because of the overhead of
30824         stacking them.  */
30825       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30826         fixed_regs[regno] = call_used_regs[regno] = 1;
30827     }
30828
30829   /* The link register can be clobbered by any branch insn,
30830      but we have no way to track that at present, so mark
30831      it as unavailable.  */
30832   if (TARGET_THUMB1)
30833     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30834
30835   if (TARGET_32BIT && TARGET_VFP_BASE)
30836     {
30837       /* VFPv3 registers are disabled when earlier VFP
30838          versions are selected due to the definition of
30839          LAST_VFP_REGNUM.  */
30840       for (regno = FIRST_VFP_REGNUM;
30841            regno <= LAST_VFP_REGNUM; ++ regno)
30842         {
30843           fixed_regs[regno] = 0;
30844           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30845             || regno >= FIRST_VFP_REGNUM + 32;
30846         }
30847       if (TARGET_HAVE_MVE)
30848         fixed_regs[VPR_REGNUM] = 0;
30849     }
30850
30851   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30852     {
30853       regno = FIRST_IWMMXT_GR_REGNUM;
30854       /* The 2002/10/09 revision of the XScale ABI has wCG0
30855          and wCG1 as call-preserved registers.  The 2002/11/21
30856          revision changed this so that all wCG registers are
30857          scratch registers.  */
30858       for (regno = FIRST_IWMMXT_GR_REGNUM;
30859            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30860         fixed_regs[regno] = 0;
30861       /* The XScale ABI has wR0 - wR9 as scratch registers,
30862          the rest as call-preserved registers.  */
30863       for (regno = FIRST_IWMMXT_REGNUM;
30864            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30865         {
30866           fixed_regs[regno] = 0;
30867           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30868         }
30869     }
30870
30871   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30872     {
30873       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30874       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30875     }
30876   else if (TARGET_APCS_STACK)
30877     {
30878       fixed_regs[10]     = 1;
30879       call_used_regs[10] = 1;
30880     }
30881   /* -mcaller-super-interworking reserves r11 for calls to
30882      _interwork_r11_call_via_rN().  Making the register global
30883      is an easy way of ensuring that it remains valid for all
30884      calls.  */
30885   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30886       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30887     {
30888       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30889       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30890       if (TARGET_CALLER_INTERWORKING)
30891         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30892     }
30893
30894   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30895   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30896   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30897
30898   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30899 }
30900
30901 static reg_class_t
30902 arm_preferred_rename_class (reg_class_t rclass)
30903 {
30904   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30905      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30906      and code size can be reduced.  */
30907   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30908     return LO_REGS;
30909   else
30910     return NO_REGS;
30911 }
30912
30913 /* Compute the attribute "length" of insn "*push_multi".
30914    So this function MUST be kept in sync with that insn pattern.  */
30915 int
30916 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30917 {
30918   int i, regno, hi_reg;
30919   int num_saves = XVECLEN (parallel_op, 0);
30920
30921   /* ARM mode.  */
30922   if (TARGET_ARM)
30923     return 4;
30924   /* Thumb1 mode.  */
30925   if (TARGET_THUMB1)
30926     return 2;
30927
30928   /* Thumb2 mode.  */
30929   regno = REGNO (first_op);
30930   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30931      list is 8-bit.  Normally this means all registers in the list must be
30932      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30933      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30934      with 16-bit encoding.  */
30935   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30936   for (i = 1; i < num_saves && !hi_reg; i++)
30937     {
30938       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30939       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30940     }
30941
30942   if (!hi_reg)
30943     return 2;
30944   return 4;
30945 }
30946
30947 /* Compute the attribute "length" of insn.  Currently, this function is used
30948    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30949    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30950    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30951    true if OPERANDS contains insn which explicit updates base register.  */
30952
30953 int
30954 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30955 {
30956   /* ARM mode.  */
30957   if (TARGET_ARM)
30958     return 4;
30959   /* Thumb1 mode.  */
30960   if (TARGET_THUMB1)
30961     return 2;
30962
30963   rtx parallel_op = operands[0];
30964   /* Initialize to elements number of PARALLEL.  */
30965   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30966   /* Initialize the value to base register.  */
30967   unsigned regno = REGNO (operands[1]);
30968   /* Skip return and write back pattern.
30969      We only need register pop pattern for later analysis.  */
30970   unsigned first_indx = 0;
30971   first_indx += return_pc ? 1 : 0;
30972   first_indx += write_back_p ? 1 : 0;
30973
30974   /* A pop operation can be done through LDM or POP.  If the base register is SP
30975      and if it's with write back, then a LDM will be alias of POP.  */
30976   bool pop_p = (regno == SP_REGNUM && write_back_p);
30977   bool ldm_p = !pop_p;
30978
30979   /* Check base register for LDM.  */
30980   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30981     return 4;
30982
30983   /* Check each register in the list.  */
30984   for (; indx >= first_indx; indx--)
30985     {
30986       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30987       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30988          comment in arm_attr_length_push_multi.  */
30989       if (REGNO_REG_CLASS (regno) == HI_REGS
30990           && (regno != PC_REGNUM || ldm_p))
30991         return 4;
30992     }
30993
30994   return 2;
30995 }
30996
30997 /* Compute the number of instructions emitted by output_move_double.  */
30998 int
30999 arm_count_output_move_double_insns (rtx *operands)
31000 {
31001   int count;
31002   rtx ops[2];
31003   /* output_move_double may modify the operands array, so call it
31004      here on a copy of the array.  */
31005   ops[0] = operands[0];
31006   ops[1] = operands[1];
31007   output_move_double (ops, false, &count);
31008   return count;
31009 }
31010
31011 /* Same as above, but operands are a register/memory pair in SImode.
31012    Assumes operands has the base register in position 0 and memory in position
31013    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
31014 int
31015 arm_count_ldrdstrd_insns (rtx *operands, bool load)
31016 {
31017   int count;
31018   rtx ops[2];
31019   int regnum, memnum;
31020   if (load)
31021     regnum = 0, memnum = 1;
31022   else
31023     regnum = 1, memnum = 0;
31024   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
31025   ops[memnum] = adjust_address (operands[2], DImode, 0);
31026   output_move_double (ops, false, &count);
31027   return count;
31028 }
31029
31030
31031 int
31032 vfp3_const_double_for_fract_bits (rtx operand)
31033 {
31034   REAL_VALUE_TYPE r0;
31035
31036   if (!CONST_DOUBLE_P (operand))
31037     return 0;
31038
31039   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
31040   if (exact_real_inverse (DFmode, &r0)
31041       && !REAL_VALUE_NEGATIVE (r0))
31042     {
31043       if (exact_real_truncate (DFmode, &r0))
31044         {
31045           HOST_WIDE_INT value = real_to_integer (&r0);
31046           value = value & 0xffffffff;
31047           if ((value != 0) && ( (value & (value - 1)) == 0))
31048             {
31049               int ret = exact_log2 (value);
31050               gcc_assert (IN_RANGE (ret, 0, 31));
31051               return ret;
31052             }
31053         }
31054     }
31055   return 0;
31056 }
31057
31058 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31059    log2 is in [1, 32], return that log2.  Otherwise return -1.
31060    This is used in the patterns for vcvt.s32.f32 floating-point to
31061    fixed-point conversions.  */
31062
31063 int
31064 vfp3_const_double_for_bits (rtx x)
31065 {
31066   const REAL_VALUE_TYPE *r;
31067
31068   if (!CONST_DOUBLE_P (x))
31069     return -1;
31070
31071   r = CONST_DOUBLE_REAL_VALUE (x);
31072
31073   if (REAL_VALUE_NEGATIVE (*r)
31074       || REAL_VALUE_ISNAN (*r)
31075       || REAL_VALUE_ISINF (*r)
31076       || !real_isinteger (r, SFmode))
31077     return -1;
31078
31079   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
31080
31081 /* The exact_log2 above will have returned -1 if this is
31082    not an exact log2.  */
31083   if (!IN_RANGE (hwint, 1, 32))
31084     return -1;
31085
31086   return hwint;
31087 }
31088
31089 \f
31090 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
31091
31092 static void
31093 arm_pre_atomic_barrier (enum memmodel model)
31094 {
31095   if (need_atomic_barrier_p (model, true))
31096     emit_insn (gen_memory_barrier ());
31097 }
31098
31099 static void
31100 arm_post_atomic_barrier (enum memmodel model)
31101 {
31102   if (need_atomic_barrier_p (model, false))
31103     emit_insn (gen_memory_barrier ());
31104 }
31105
31106 /* Emit the load-exclusive and store-exclusive instructions.
31107    Use acquire and release versions if necessary.  */
31108
31109 static void
31110 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31111 {
31112   rtx (*gen) (rtx, rtx);
31113
31114   if (acq)
31115     {
31116       switch (mode)
31117         {
31118         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31119         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31120         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31121         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31122         default:
31123           gcc_unreachable ();
31124         }
31125     }
31126   else
31127     {
31128       switch (mode)
31129         {
31130         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31131         case E_HImode: gen = gen_arm_load_exclusivehi; break;
31132         case E_SImode: gen = gen_arm_load_exclusivesi; break;
31133         case E_DImode: gen = gen_arm_load_exclusivedi; break;
31134         default:
31135           gcc_unreachable ();
31136         }
31137     }
31138
31139   emit_insn (gen (rval, mem));
31140 }
31141
31142 static void
31143 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31144                           rtx mem, bool rel)
31145 {
31146   rtx (*gen) (rtx, rtx, rtx);
31147
31148   if (rel)
31149     {
31150       switch (mode)
31151         {
31152         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31153         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31154         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31155         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31156         default:
31157           gcc_unreachable ();
31158         }
31159     }
31160   else
31161     {
31162       switch (mode)
31163         {
31164         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31165         case E_HImode: gen = gen_arm_store_exclusivehi; break;
31166         case E_SImode: gen = gen_arm_store_exclusivesi; break;
31167         case E_DImode: gen = gen_arm_store_exclusivedi; break;
31168         default:
31169           gcc_unreachable ();
31170         }
31171     }
31172
31173   emit_insn (gen (bval, rval, mem));
31174 }
31175
31176 /* Mark the previous jump instruction as unlikely.  */
31177
31178 static void
31179 emit_unlikely_jump (rtx insn)
31180 {
31181   rtx_insn *jump = emit_jump_insn (insn);
31182   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31183 }
31184
31185 /* Expand a compare and swap pattern.  */
31186
31187 void
31188 arm_expand_compare_and_swap (rtx operands[])
31189 {
31190   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31191   machine_mode mode, cmp_mode;
31192
31193   bval = operands[0];
31194   rval = operands[1];
31195   mem = operands[2];
31196   oldval = operands[3];
31197   newval = operands[4];
31198   is_weak = operands[5];
31199   mod_s = operands[6];
31200   mod_f = operands[7];
31201   mode = GET_MODE (mem);
31202
31203   /* Normally the succ memory model must be stronger than fail, but in the
31204      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31205      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
31206
31207   if (TARGET_HAVE_LDACQ
31208       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31209       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31210     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31211
31212   switch (mode)
31213     {
31214     case E_QImode:
31215     case E_HImode:
31216       /* For narrow modes, we're going to perform the comparison in SImode,
31217          so do the zero-extension now.  */
31218       rval = gen_reg_rtx (SImode);
31219       oldval = convert_modes (SImode, mode, oldval, true);
31220       /* FALLTHRU */
31221
31222     case E_SImode:
31223       /* Force the value into a register if needed.  We waited until after
31224          the zero-extension above to do this properly.  */
31225       if (!arm_add_operand (oldval, SImode))
31226         oldval = force_reg (SImode, oldval);
31227       break;
31228
31229     case E_DImode:
31230       if (!cmpdi_operand (oldval, mode))
31231         oldval = force_reg (mode, oldval);
31232       break;
31233
31234     default:
31235       gcc_unreachable ();
31236     }
31237
31238   if (TARGET_THUMB1)
31239     cmp_mode = E_SImode;
31240   else
31241     cmp_mode = CC_Zmode;
31242
31243   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31244   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31245                                         oldval, newval, is_weak, mod_s, mod_f));
31246
31247   if (mode == QImode || mode == HImode)
31248     emit_move_insn (operands[1], gen_lowpart (mode, rval));
31249
31250   /* In all cases, we arrange for success to be signaled by Z set.
31251      This arrangement allows for the boolean result to be used directly
31252      in a subsequent branch, post optimization.  For Thumb-1 targets, the
31253      boolean negation of the result is also stored in bval because Thumb-1
31254      backend lacks dependency tracking for CC flag due to flag-setting not
31255      being represented at RTL level.  */
31256   if (TARGET_THUMB1)
31257       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31258   else
31259     {
31260       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31261       emit_insn (gen_rtx_SET (bval, x));
31262     }
31263 }
31264
31265 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
31266    another memory store between the load-exclusive and store-exclusive can
31267    reset the monitor from Exclusive to Open state.  This means we must wait
31268    until after reload to split the pattern, lest we get a register spill in
31269    the middle of the atomic sequence.  Success of the compare and swap is
31270    indicated by the Z flag set for 32bit targets and by neg_bval being zero
31271    for Thumb-1 targets (ie. negation of the boolean value returned by
31272    atomic_compare_and_swapmode standard pattern in operand 0).  */
31273
31274 void
31275 arm_split_compare_and_swap (rtx operands[])
31276 {
31277   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31278   machine_mode mode;
31279   enum memmodel mod_s, mod_f;
31280   bool is_weak;
31281   rtx_code_label *label1, *label2;
31282   rtx x, cond;
31283
31284   rval = operands[1];
31285   mem = operands[2];
31286   oldval = operands[3];
31287   newval = operands[4];
31288   is_weak = (operands[5] != const0_rtx);
31289   mod_s_rtx = operands[6];
31290   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31291   mod_f = memmodel_from_int (INTVAL (operands[7]));
31292   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31293   mode = GET_MODE (mem);
31294
31295   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31296
31297   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31298   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31299
31300   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
31301      a full barrier is emitted after the store-release.  */
31302   if (is_armv8_sync)
31303     use_acquire = false;
31304
31305   /* Checks whether a barrier is needed and emits one accordingly.  */
31306   if (!(use_acquire || use_release))
31307     arm_pre_atomic_barrier (mod_s);
31308
31309   label1 = NULL;
31310   if (!is_weak)
31311     {
31312       label1 = gen_label_rtx ();
31313       emit_label (label1);
31314     }
31315   label2 = gen_label_rtx ();
31316
31317   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31318
31319   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31320      as required to communicate with arm_expand_compare_and_swap.  */
31321   if (TARGET_32BIT)
31322     {
31323       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31324       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31325       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31326                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31327       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31328     }
31329   else
31330     {
31331       cond = gen_rtx_NE (VOIDmode, rval, oldval);
31332       if (thumb1_cmpneg_operand (oldval, SImode))
31333         {
31334           rtx src = rval;
31335           if (!satisfies_constraint_L (oldval))
31336             {
31337               gcc_assert (satisfies_constraint_J (oldval));
31338
31339               /* For such immediates, ADDS needs the source and destination regs
31340                  to be the same.
31341
31342                  Normally this would be handled by RA, but this is all happening
31343                  after RA.  */
31344               emit_move_insn (neg_bval, rval);
31345               src = neg_bval;
31346             }
31347
31348           emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31349                                                        label2, cond));
31350         }
31351       else
31352         {
31353           emit_move_insn (neg_bval, const1_rtx);
31354           emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31355         }
31356     }
31357
31358   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31359
31360   /* Weak or strong, we want EQ to be true for success, so that we
31361      match the flags that we got from the compare above.  */
31362   if (TARGET_32BIT)
31363     {
31364       cond = gen_rtx_REG (CCmode, CC_REGNUM);
31365       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31366       emit_insn (gen_rtx_SET (cond, x));
31367     }
31368
31369   if (!is_weak)
31370     {
31371       /* Z is set to boolean value of !neg_bval, as required to communicate
31372          with arm_expand_compare_and_swap.  */
31373       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31374       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31375     }
31376
31377   if (!is_mm_relaxed (mod_f))
31378     emit_label (label2);
31379
31380   /* Checks whether a barrier is needed and emits one accordingly.  */
31381   if (is_armv8_sync
31382       || !(use_acquire || use_release))
31383     arm_post_atomic_barrier (mod_s);
31384
31385   if (is_mm_relaxed (mod_f))
31386     emit_label (label2);
31387 }
31388
31389 /* Split an atomic operation pattern.  Operation is given by CODE and is one
31390    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31391    operation).  Operation is performed on the content at MEM and on VALUE
31392    following the memory model MODEL_RTX.  The content at MEM before and after
31393    the operation is returned in OLD_OUT and NEW_OUT respectively while the
31394    success of the operation is returned in COND.  Using a scratch register or
31395    an operand register for these determines what result is returned for that
31396    pattern.  */
31397
31398 void
31399 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31400                      rtx value, rtx model_rtx, rtx cond)
31401 {
31402   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31403   machine_mode mode = GET_MODE (mem);
31404   machine_mode wmode = (mode == DImode ? DImode : SImode);
31405   rtx_code_label *label;
31406   bool all_low_regs, bind_old_new;
31407   rtx x;
31408
31409   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31410
31411   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31412   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31413
31414   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
31415      a full barrier is emitted after the store-release.  */
31416   if (is_armv8_sync)
31417     use_acquire = false;
31418
31419   /* Checks whether a barrier is needed and emits one accordingly.  */
31420   if (!(use_acquire || use_release))
31421     arm_pre_atomic_barrier (model);
31422
31423   label = gen_label_rtx ();
31424   emit_label (label);
31425
31426   if (new_out)
31427     new_out = gen_lowpart (wmode, new_out);
31428   if (old_out)
31429     old_out = gen_lowpart (wmode, old_out);
31430   else
31431     old_out = new_out;
31432   value = simplify_gen_subreg (wmode, value, mode, 0);
31433
31434   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31435
31436   /* Does the operation require destination and first operand to use the same
31437      register?  This is decided by register constraints of relevant insn
31438      patterns in thumb1.md.  */
31439   gcc_assert (!new_out || REG_P (new_out));
31440   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31441                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31442                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31443   bind_old_new =
31444     (TARGET_THUMB1
31445      && code != SET
31446      && code != MINUS
31447      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31448
31449   /* We want to return the old value while putting the result of the operation
31450      in the same register as the old value so copy the old value over to the
31451      destination register and use that register for the operation.  */
31452   if (old_out && bind_old_new)
31453     {
31454       emit_move_insn (new_out, old_out);
31455       old_out = new_out;
31456     }
31457
31458   switch (code)
31459     {
31460     case SET:
31461       new_out = value;
31462       break;
31463
31464     case NOT:
31465       x = gen_rtx_AND (wmode, old_out, value);
31466       emit_insn (gen_rtx_SET (new_out, x));
31467       x = gen_rtx_NOT (wmode, new_out);
31468       emit_insn (gen_rtx_SET (new_out, x));
31469       break;
31470
31471     case MINUS:
31472       if (CONST_INT_P (value))
31473         {
31474           value = gen_int_mode (-INTVAL (value), wmode);
31475           code = PLUS;
31476         }
31477       /* FALLTHRU */
31478
31479     case PLUS:
31480       if (mode == DImode)
31481         {
31482           /* DImode plus/minus need to clobber flags.  */
31483           /* The adddi3 and subdi3 patterns are incorrectly written so that
31484              they require matching operands, even when we could easily support
31485              three operands.  Thankfully, this can be fixed up post-splitting,
31486              as the individual add+adc patterns do accept three operands and
31487              post-reload cprop can make these moves go away.  */
31488           emit_move_insn (new_out, old_out);
31489           if (code == PLUS)
31490             x = gen_adddi3 (new_out, new_out, value);
31491           else
31492             x = gen_subdi3 (new_out, new_out, value);
31493           emit_insn (x);
31494           break;
31495         }
31496       /* FALLTHRU */
31497
31498     default:
31499       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31500       emit_insn (gen_rtx_SET (new_out, x));
31501       break;
31502     }
31503
31504   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31505                             use_release);
31506
31507   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31508   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31509
31510   /* Checks whether a barrier is needed and emits one accordingly.  */
31511   if (is_armv8_sync
31512       || !(use_acquire || use_release))
31513     arm_post_atomic_barrier (model);
31514 }
31515 \f
31516 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
31517 opt_machine_mode
31518 arm_mode_to_pred_mode (machine_mode mode)
31519 {
31520   switch (GET_MODE_NUNITS (mode))
31521     {
31522     case 16: return V16BImode;
31523     case 8: return V8BImode;
31524     case 4: return V4BImode;
31525     case 2: return V2QImode;
31526     }
31527   return opt_machine_mode ();
31528 }
31529
31530 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31531    If CAN_INVERT, store either the result or its inverse in TARGET
31532    and return true if TARGET contains the inverse.  If !CAN_INVERT,
31533    always store the result in TARGET, never its inverse.
31534
31535    Note that the handling of floating-point comparisons is not
31536    IEEE compliant.  */
31537
31538 bool
31539 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31540                            bool can_invert)
31541 {
31542   machine_mode cmp_result_mode = GET_MODE (target);
31543   machine_mode cmp_mode = GET_MODE (op0);
31544
31545   bool inverted;
31546
31547   /* MVE supports more comparisons than Neon.  */
31548   if (TARGET_HAVE_MVE)
31549       inverted = false;
31550   else
31551     switch (code)
31552       {
31553         /* For these we need to compute the inverse of the requested
31554            comparison.  */
31555       case UNORDERED:
31556       case UNLT:
31557       case UNLE:
31558       case UNGT:
31559       case UNGE:
31560       case UNEQ:
31561       case NE:
31562         code = reverse_condition_maybe_unordered (code);
31563         if (!can_invert)
31564           {
31565             /* Recursively emit the inverted comparison into a temporary
31566                and then store its inverse in TARGET.  This avoids reusing
31567                TARGET (which for integer NE could be one of the inputs).  */
31568             rtx tmp = gen_reg_rtx (cmp_result_mode);
31569             if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31570               gcc_unreachable ();
31571             emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31572             return false;
31573           }
31574         inverted = true;
31575         break;
31576
31577       default:
31578         inverted = false;
31579         break;
31580       }
31581
31582   switch (code)
31583     {
31584     /* These are natively supported by Neon for zero comparisons, but otherwise
31585        require the operands to be swapped. For MVE, we can only compare
31586        registers.  */
31587     case LE:
31588     case LT:
31589       if (!TARGET_HAVE_MVE)
31590         if (op1 != CONST0_RTX (cmp_mode))
31591           {
31592             code = swap_condition (code);
31593             std::swap (op0, op1);
31594           }
31595       /* Fall through.  */
31596
31597     /* These are natively supported by Neon for both register and zero
31598        operands. MVE supports registers only.  */
31599     case EQ:
31600     case GE:
31601     case GT:
31602     case NE:
31603       if (TARGET_HAVE_MVE)
31604         {
31605           switch (GET_MODE_CLASS (cmp_mode))
31606             {
31607             case MODE_VECTOR_INT:
31608               emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31609                                         op0, force_reg (cmp_mode, op1)));
31610               break;
31611             case MODE_VECTOR_FLOAT:
31612               if (TARGET_HAVE_MVE_FLOAT)
31613                 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31614                                             op0, force_reg (cmp_mode, op1)));
31615               else
31616                 gcc_unreachable ();
31617               break;
31618             default:
31619               gcc_unreachable ();
31620             }
31621         }
31622       else
31623         emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31624       return inverted;
31625
31626     /* These are natively supported for register operands only.
31627        Comparisons with zero aren't useful and should be folded
31628        or canonicalized by target-independent code.  */
31629     case GEU:
31630     case GTU:
31631       if (TARGET_HAVE_MVE)
31632         emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31633                                   op0, force_reg (cmp_mode, op1)));
31634       else
31635         emit_insn (gen_neon_vc (code, cmp_mode, target,
31636                                 op0, force_reg (cmp_mode, op1)));
31637       return inverted;
31638
31639     /* These require the operands to be swapped and likewise do not
31640        support comparisons with zero.  */
31641     case LEU:
31642     case LTU:
31643       if (TARGET_HAVE_MVE)
31644         emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31645                                   force_reg (cmp_mode, op1), op0));
31646       else
31647         emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31648                                 target, force_reg (cmp_mode, op1), op0));
31649       return inverted;
31650
31651     /* These need a combination of two comparisons.  */
31652     case LTGT:
31653     case ORDERED:
31654       {
31655         /* Operands are LTGT iff (a > b || a > b).
31656            Operands are ORDERED iff (a > b || a <= b).  */
31657         rtx gt_res = gen_reg_rtx (cmp_result_mode);
31658         rtx alt_res = gen_reg_rtx (cmp_result_mode);
31659         rtx_code alt_code = (code == LTGT ? LT : LE);
31660         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31661             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31662           gcc_unreachable ();
31663         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31664                                                      gt_res, alt_res)));
31665         return inverted;
31666       }
31667
31668     default:
31669       gcc_unreachable ();
31670     }
31671 }
31672
31673 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31674    CMP_RESULT_MODE is the mode of the comparison result.  */
31675
31676 void
31677 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31678 {
31679   /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31680      arm_expand_vector_compare, and another one here.  */
31681   rtx mask;
31682
31683   if (TARGET_HAVE_MVE)
31684     mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31685   else
31686     mask = gen_reg_rtx (cmp_result_mode);
31687
31688   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31689                                              operands[4], operands[5], true);
31690   if (inverted)
31691     std::swap (operands[1], operands[2]);
31692   if (TARGET_NEON)
31693   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31694                             mask, operands[1], operands[2]));
31695   else
31696     {
31697       machine_mode cmp_mode = GET_MODE (operands[0]);
31698
31699       switch (GET_MODE_CLASS (cmp_mode))
31700         {
31701         case MODE_VECTOR_INT:
31702           emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
31703                                 operands[1], operands[2], mask));
31704           break;
31705         case MODE_VECTOR_FLOAT:
31706           if (TARGET_HAVE_MVE_FLOAT)
31707             emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
31708                                     operands[1], operands[2], mask));
31709           else
31710             gcc_unreachable ();
31711           break;
31712         default:
31713           gcc_unreachable ();
31714         }
31715     }
31716 }
31717 \f
31718 #define MAX_VECT_LEN 16
31719
31720 struct expand_vec_perm_d
31721 {
31722   rtx target, op0, op1;
31723   vec_perm_indices perm;
31724   machine_mode vmode;
31725   bool one_vector_p;
31726   bool testing_p;
31727 };
31728
31729 /* Generate a variable permutation.  */
31730
31731 static void
31732 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31733 {
31734   machine_mode vmode = GET_MODE (target);
31735   bool one_vector_p = rtx_equal_p (op0, op1);
31736
31737   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31738   gcc_checking_assert (GET_MODE (op0) == vmode);
31739   gcc_checking_assert (GET_MODE (op1) == vmode);
31740   gcc_checking_assert (GET_MODE (sel) == vmode);
31741   gcc_checking_assert (TARGET_NEON);
31742
31743   if (one_vector_p)
31744     {
31745       if (vmode == V8QImode)
31746         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31747       else
31748         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31749     }
31750   else
31751     {
31752       rtx pair;
31753
31754       if (vmode == V8QImode)
31755         {
31756           pair = gen_reg_rtx (V16QImode);
31757           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31758           pair = gen_lowpart (TImode, pair);
31759           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31760         }
31761       else
31762         {
31763           pair = gen_reg_rtx (OImode);
31764           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31765           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31766         }
31767     }
31768 }
31769
31770 void
31771 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31772 {
31773   machine_mode vmode = GET_MODE (target);
31774   unsigned int nelt = GET_MODE_NUNITS (vmode);
31775   bool one_vector_p = rtx_equal_p (op0, op1);
31776   rtx mask;
31777
31778   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31779      numbering of elements for big-endian, we must reverse the order.  */
31780   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31781
31782   /* The VTBL instruction does not use a modulo index, so we must take care
31783      of that ourselves.  */
31784   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31785   mask = gen_const_vec_duplicate (vmode, mask);
31786   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31787
31788   arm_expand_vec_perm_1 (target, op0, op1, sel);
31789 }
31790
31791 /* Map lane ordering between architectural lane order, and GCC lane order,
31792    taking into account ABI.  See comment above output_move_neon for details.  */
31793
31794 static int
31795 neon_endian_lane_map (machine_mode mode, int lane)
31796 {
31797   if (BYTES_BIG_ENDIAN)
31798   {
31799     int nelems = GET_MODE_NUNITS (mode);
31800     /* Reverse lane order.  */
31801     lane = (nelems - 1 - lane);
31802     /* Reverse D register order, to match ABI.  */
31803     if (GET_MODE_SIZE (mode) == 16)
31804       lane = lane ^ (nelems / 2);
31805   }
31806   return lane;
31807 }
31808
31809 /* Some permutations index into pairs of vectors, this is a helper function
31810    to map indexes into those pairs of vectors.  */
31811
31812 static int
31813 neon_pair_endian_lane_map (machine_mode mode, int lane)
31814 {
31815   int nelem = GET_MODE_NUNITS (mode);
31816   if (BYTES_BIG_ENDIAN)
31817     lane =
31818       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31819   return lane;
31820 }
31821
31822 /* Generate or test for an insn that supports a constant permutation.  */
31823
31824 /* Recognize patterns for the VUZP insns.  */
31825
31826 static bool
31827 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31828 {
31829   unsigned int i, odd, mask, nelt = d->perm.length ();
31830   rtx out0, out1, in0, in1;
31831   int first_elem;
31832   int swap_nelt;
31833
31834   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31835     return false;
31836
31837   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31838      big endian pattern on 64 bit vectors, so we correct for that.  */
31839   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31840     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31841
31842   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31843
31844   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31845     odd = 0;
31846   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31847     odd = 1;
31848   else
31849     return false;
31850   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31851
31852   for (i = 0; i < nelt; i++)
31853     {
31854       unsigned elt =
31855         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31856       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31857         return false;
31858     }
31859
31860   /* Success!  */
31861   if (d->testing_p)
31862     return true;
31863
31864   in0 = d->op0;
31865   in1 = d->op1;
31866   if (swap_nelt != 0)
31867     std::swap (in0, in1);
31868
31869   out0 = d->target;
31870   out1 = gen_reg_rtx (d->vmode);
31871   if (odd)
31872     std::swap (out0, out1);
31873
31874   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31875   return true;
31876 }
31877
31878 /* Recognize patterns for the VZIP insns.  */
31879
31880 static bool
31881 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31882 {
31883   unsigned int i, high, mask, nelt = d->perm.length ();
31884   rtx out0, out1, in0, in1;
31885   int first_elem;
31886   bool is_swapped;
31887
31888   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31889     return false;
31890
31891   is_swapped = BYTES_BIG_ENDIAN;
31892
31893   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31894
31895   high = nelt / 2;
31896   if (first_elem == neon_endian_lane_map (d->vmode, high))
31897     ;
31898   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31899     high = 0;
31900   else
31901     return false;
31902   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31903
31904   for (i = 0; i < nelt / 2; i++)
31905     {
31906       unsigned elt =
31907         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31908       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31909           != elt)
31910         return false;
31911       elt =
31912         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31913       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31914           != elt)
31915         return false;
31916     }
31917
31918   /* Success!  */
31919   if (d->testing_p)
31920     return true;
31921
31922   in0 = d->op0;
31923   in1 = d->op1;
31924   if (is_swapped)
31925     std::swap (in0, in1);
31926
31927   out0 = d->target;
31928   out1 = gen_reg_rtx (d->vmode);
31929   if (high)
31930     std::swap (out0, out1);
31931
31932   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31933   return true;
31934 }
31935
31936 /* Recognize patterns for the VREV insns.  */
31937 static bool
31938 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31939 {
31940   unsigned int i, j, diff, nelt = d->perm.length ();
31941   rtx (*gen) (machine_mode, rtx, rtx);
31942
31943   if (!d->one_vector_p)
31944     return false;
31945
31946   diff = d->perm[0];
31947   switch (diff)
31948     {
31949     case 7:
31950        switch (d->vmode)
31951         {
31952          case E_V16QImode:
31953          case E_V8QImode:
31954           gen = gen_neon_vrev64;
31955           break;
31956          default:
31957           return false;
31958         }
31959        break;
31960     case 3:
31961        switch (d->vmode)
31962         {
31963         case E_V16QImode:
31964         case E_V8QImode:
31965           gen = gen_neon_vrev32;
31966           break;
31967         case E_V8HImode:
31968         case E_V4HImode:
31969         case E_V8HFmode:
31970         case E_V4HFmode:
31971           gen = gen_neon_vrev64;
31972           break;
31973         default:
31974           return false;
31975         }
31976       break;
31977     case 1:
31978       switch (d->vmode)
31979         {
31980         case E_V16QImode:
31981         case E_V8QImode:
31982           gen = gen_neon_vrev16;
31983           break;
31984         case E_V8HImode:
31985         case E_V4HImode:
31986           gen = gen_neon_vrev32;
31987           break;
31988         case E_V4SImode:
31989         case E_V2SImode:
31990         case E_V4SFmode:
31991         case E_V2SFmode:
31992           gen = gen_neon_vrev64;
31993           break;
31994         default:
31995           return false;
31996         }
31997       break;
31998     default:
31999       return false;
32000     }
32001
32002   for (i = 0; i < nelt ; i += diff + 1)
32003     for (j = 0; j <= diff; j += 1)
32004       {
32005         /* This is guaranteed to be true as the value of diff
32006            is 7, 3, 1 and we should have enough elements in the
32007            queue to generate this. Getting a vector mask with a
32008            value of diff other than these values implies that
32009            something is wrong by the time we get here.  */
32010         gcc_assert (i + j < nelt);
32011         if (d->perm[i + j] != i + diff - j)
32012           return false;
32013       }
32014
32015   /* Success! */
32016   if (d->testing_p)
32017     return true;
32018
32019   emit_insn (gen (d->vmode, d->target, d->op0));
32020   return true;
32021 }
32022
32023 /* Recognize patterns for the VTRN insns.  */
32024
32025 static bool
32026 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
32027 {
32028   unsigned int i, odd, mask, nelt = d->perm.length ();
32029   rtx out0, out1, in0, in1;
32030
32031   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32032     return false;
32033
32034   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
32035   if (d->perm[0] == 0)
32036     odd = 0;
32037   else if (d->perm[0] == 1)
32038     odd = 1;
32039   else
32040     return false;
32041   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32042
32043   for (i = 0; i < nelt; i += 2)
32044     {
32045       if (d->perm[i] != i + odd)
32046         return false;
32047       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
32048         return false;
32049     }
32050
32051   /* Success!  */
32052   if (d->testing_p)
32053     return true;
32054
32055   in0 = d->op0;
32056   in1 = d->op1;
32057   if (BYTES_BIG_ENDIAN)
32058     {
32059       std::swap (in0, in1);
32060       odd = !odd;
32061     }
32062
32063   out0 = d->target;
32064   out1 = gen_reg_rtx (d->vmode);
32065   if (odd)
32066     std::swap (out0, out1);
32067
32068   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
32069   return true;
32070 }
32071
32072 /* Recognize patterns for the VEXT insns.  */
32073
32074 static bool
32075 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
32076 {
32077   unsigned int i, nelt = d->perm.length ();
32078   rtx offset;
32079
32080   unsigned int location;
32081
32082   unsigned int next  = d->perm[0] + 1;
32083
32084   /* TODO: Handle GCC's numbering of elements for big-endian.  */
32085   if (BYTES_BIG_ENDIAN)
32086     return false;
32087
32088   /* Check if the extracted indexes are increasing by one.  */
32089   for (i = 1; i < nelt; next++, i++)
32090     {
32091       /* If we hit the most significant element of the 2nd vector in
32092          the previous iteration, no need to test further.  */
32093       if (next == 2 * nelt)
32094         return false;
32095
32096       /* If we are operating on only one vector: it could be a
32097          rotation.  If there are only two elements of size < 64, let
32098          arm_evpc_neon_vrev catch it.  */
32099       if (d->one_vector_p && (next == nelt))
32100         {
32101           if ((nelt == 2) && (d->vmode != V2DImode))
32102             return false;
32103           else
32104             next = 0;
32105         }
32106
32107       if (d->perm[i] != next)
32108         return false;
32109     }
32110
32111   location = d->perm[0];
32112
32113   /* Success! */
32114   if (d->testing_p)
32115     return true;
32116
32117   offset = GEN_INT (location);
32118
32119   if(d->vmode == E_DImode)
32120     return false;
32121
32122   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32123   return true;
32124 }
32125
32126 /* The NEON VTBL instruction is a fully variable permuation that's even
32127    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
32128    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
32129    can do slightly better by expanding this as a constant where we don't
32130    have to apply a mask.  */
32131
32132 static bool
32133 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32134 {
32135   rtx rperm[MAX_VECT_LEN], sel;
32136   machine_mode vmode = d->vmode;
32137   unsigned int i, nelt = d->perm.length ();
32138
32139   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
32140      numbering of elements for big-endian, we must reverse the order.  */
32141   if (BYTES_BIG_ENDIAN)
32142     return false;
32143
32144   if (d->testing_p)
32145     return true;
32146
32147   /* Generic code will try constant permutation twice.  Once with the
32148      original mode and again with the elements lowered to QImode.
32149      So wait and don't do the selector expansion ourselves.  */
32150   if (vmode != V8QImode && vmode != V16QImode)
32151     return false;
32152
32153   for (i = 0; i < nelt; ++i)
32154     rperm[i] = GEN_INT (d->perm[i]);
32155   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32156   sel = force_reg (vmode, sel);
32157
32158   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32159   return true;
32160 }
32161
32162 static bool
32163 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32164 {
32165   /* Check if the input mask matches vext before reordering the
32166      operands.  */
32167   if (TARGET_NEON)
32168     if (arm_evpc_neon_vext (d))
32169       return true;
32170
32171   /* The pattern matching functions above are written to look for a small
32172      number to begin the sequence (0, 1, N/2).  If we begin with an index
32173      from the second operand, we can swap the operands.  */
32174   unsigned int nelt = d->perm.length ();
32175   if (d->perm[0] >= nelt)
32176     {
32177       d->perm.rotate_inputs (1);
32178       std::swap (d->op0, d->op1);
32179     }
32180
32181   if (TARGET_NEON)
32182     {
32183       if (arm_evpc_neon_vuzp (d))
32184         return true;
32185       if (arm_evpc_neon_vzip (d))
32186         return true;
32187       if (arm_evpc_neon_vrev (d))
32188         return true;
32189       if (arm_evpc_neon_vtrn (d))
32190         return true;
32191       return arm_evpc_neon_vtbl (d);
32192     }
32193   return false;
32194 }
32195
32196 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
32197
32198 static bool
32199 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32200                               rtx target, rtx op0, rtx op1,
32201                               const vec_perm_indices &sel)
32202 {
32203   if (vmode != op_mode)
32204     return false;
32205
32206   struct expand_vec_perm_d d;
32207   int i, nelt, which;
32208
32209   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32210     return false;
32211
32212   d.target = target;
32213   if (op0)
32214     {
32215       rtx nop0 = force_reg (vmode, op0);
32216       if (op0 == op1)
32217         op1 = nop0;
32218       op0 = nop0;
32219     }
32220   if (op1)
32221     op1 = force_reg (vmode, op1);
32222   d.op0 = op0;
32223   d.op1 = op1;
32224
32225   d.vmode = vmode;
32226   gcc_assert (VECTOR_MODE_P (d.vmode));
32227   d.testing_p = !target;
32228
32229   nelt = GET_MODE_NUNITS (d.vmode);
32230   for (i = which = 0; i < nelt; ++i)
32231     {
32232       int ei = sel[i] & (2 * nelt - 1);
32233       which |= (ei < nelt ? 1 : 2);
32234     }
32235
32236   switch (which)
32237     {
32238     default:
32239       gcc_unreachable();
32240
32241     case 3:
32242       d.one_vector_p = false;
32243       if (d.testing_p || !rtx_equal_p (op0, op1))
32244         break;
32245
32246       /* The elements of PERM do not suggest that only the first operand
32247          is used, but both operands are identical.  Allow easier matching
32248          of the permutation by folding the permutation into the single
32249          input vector.  */
32250       /* FALLTHRU */
32251     case 2:
32252       d.op0 = op1;
32253       d.one_vector_p = true;
32254       break;
32255
32256     case 1:
32257       d.op1 = op0;
32258       d.one_vector_p = true;
32259       break;
32260     }
32261
32262   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32263
32264   if (!d.testing_p)
32265     return arm_expand_vec_perm_const_1 (&d);
32266
32267   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32268   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32269   if (!d.one_vector_p)
32270     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32271
32272   start_sequence ();
32273   bool ret = arm_expand_vec_perm_const_1 (&d);
32274   end_sequence ();
32275
32276   return ret;
32277 }
32278
32279 bool
32280 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32281 {
32282   /* If we are soft float and we do not have ldrd
32283      then all auto increment forms are ok.  */
32284   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32285     return true;
32286
32287   switch (code)
32288     {
32289       /* Post increment and Pre Decrement are supported for all
32290          instruction forms except for vector forms.  */
32291     case ARM_POST_INC:
32292     case ARM_PRE_DEC:
32293       if (VECTOR_MODE_P (mode))
32294         {
32295           if (code != ARM_PRE_DEC)
32296             return true;
32297           else
32298             return false;
32299         }
32300
32301       return true;
32302
32303     case ARM_POST_DEC:
32304     case ARM_PRE_INC:
32305       /* Without LDRD and mode size greater than
32306          word size, there is no point in auto-incrementing
32307          because ldm and stm will not have these forms.  */
32308       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32309         return false;
32310
32311       /* Vector and floating point modes do not support
32312          these auto increment forms.  */
32313       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32314         return false;
32315
32316       return true;
32317
32318     default:
32319       return false;
32320
32321     }
32322
32323   return false;
32324 }
32325
32326 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32327    on ARM, since we know that shifts by negative amounts are no-ops.
32328    Additionally, the default expansion code is not available or suitable
32329    for post-reload insn splits (this can occur when the register allocator
32330    chooses not to do a shift in NEON).
32331
32332    This function is used in both initial expand and post-reload splits, and
32333    handles all kinds of 64-bit shifts.
32334
32335    Input requirements:
32336     - It is safe for the input and output to be the same register, but
32337       early-clobber rules apply for the shift amount and scratch registers.
32338     - Shift by register requires both scratch registers.  In all other cases
32339       the scratch registers may be NULL.
32340     - Ashiftrt by a register also clobbers the CC register.  */
32341 void
32342 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32343                                rtx amount, rtx scratch1, rtx scratch2)
32344 {
32345   rtx out_high = gen_highpart (SImode, out);
32346   rtx out_low = gen_lowpart (SImode, out);
32347   rtx in_high = gen_highpart (SImode, in);
32348   rtx in_low = gen_lowpart (SImode, in);
32349
32350   /* Terminology:
32351         in = the register pair containing the input value.
32352         out = the destination register pair.
32353         up = the high- or low-part of each pair.
32354         down = the opposite part to "up".
32355      In a shift, we can consider bits to shift from "up"-stream to
32356      "down"-stream, so in a left-shift "up" is the low-part and "down"
32357      is the high-part of each register pair.  */
32358
32359   rtx out_up   = code == ASHIFT ? out_low : out_high;
32360   rtx out_down = code == ASHIFT ? out_high : out_low;
32361   rtx in_up   = code == ASHIFT ? in_low : in_high;
32362   rtx in_down = code == ASHIFT ? in_high : in_low;
32363
32364   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32365   gcc_assert (out
32366               && (REG_P (out) || SUBREG_P (out))
32367               && GET_MODE (out) == DImode);
32368   gcc_assert (in
32369               && (REG_P (in) || SUBREG_P (in))
32370               && GET_MODE (in) == DImode);
32371   gcc_assert (amount
32372               && (((REG_P (amount) || SUBREG_P (amount))
32373                    && GET_MODE (amount) == SImode)
32374                   || CONST_INT_P (amount)));
32375   gcc_assert (scratch1 == NULL
32376               || (GET_CODE (scratch1) == SCRATCH)
32377               || (GET_MODE (scratch1) == SImode
32378                   && REG_P (scratch1)));
32379   gcc_assert (scratch2 == NULL
32380               || (GET_CODE (scratch2) == SCRATCH)
32381               || (GET_MODE (scratch2) == SImode
32382                   && REG_P (scratch2)));
32383   gcc_assert (!REG_P (out) || !REG_P (amount)
32384               || !HARD_REGISTER_P (out)
32385               || (REGNO (out) != REGNO (amount)
32386                   && REGNO (out) + 1 != REGNO (amount)));
32387
32388   /* Macros to make following code more readable.  */
32389   #define SUB_32(DEST,SRC) \
32390             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32391   #define RSB_32(DEST,SRC) \
32392             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32393   #define SUB_S_32(DEST,SRC) \
32394             gen_addsi3_compare0 ((DEST), (SRC), \
32395                                  GEN_INT (-32))
32396   #define SET(DEST,SRC) \
32397             gen_rtx_SET ((DEST), (SRC))
32398   #define SHIFT(CODE,SRC,AMOUNT) \
32399             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32400   #define LSHIFT(CODE,SRC,AMOUNT) \
32401             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32402                             SImode, (SRC), (AMOUNT))
32403   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32404             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32405                             SImode, (SRC), (AMOUNT))
32406   #define ORR(A,B) \
32407             gen_rtx_IOR (SImode, (A), (B))
32408   #define BRANCH(COND,LABEL) \
32409             gen_arm_cond_branch ((LABEL), \
32410                                  gen_rtx_ ## COND (CCmode, cc_reg, \
32411                                                    const0_rtx), \
32412                                  cc_reg)
32413
32414   /* Shifts by register and shifts by constant are handled separately.  */
32415   if (CONST_INT_P (amount))
32416     {
32417       /* We have a shift-by-constant.  */
32418
32419       /* First, handle out-of-range shift amounts.
32420          In both cases we try to match the result an ARM instruction in a
32421          shift-by-register would give.  This helps reduce execution
32422          differences between optimization levels, but it won't stop other
32423          parts of the compiler doing different things.  This is "undefined
32424          behavior, in any case.  */
32425       if (INTVAL (amount) <= 0)
32426         emit_insn (gen_movdi (out, in));
32427       else if (INTVAL (amount) >= 64)
32428         {
32429           if (code == ASHIFTRT)
32430             {
32431               rtx const31_rtx = GEN_INT (31);
32432               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32433               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32434             }
32435           else
32436             emit_insn (gen_movdi (out, const0_rtx));
32437         }
32438
32439       /* Now handle valid shifts. */
32440       else if (INTVAL (amount) < 32)
32441         {
32442           /* Shifts by a constant less than 32.  */
32443           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32444
32445           /* Clearing the out register in DImode first avoids lots
32446              of spilling and results in less stack usage.
32447              Later this redundant insn is completely removed.
32448              Do that only if "in" and "out" are different registers.  */
32449           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32450             emit_insn (SET (out, const0_rtx));
32451           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32452           emit_insn (SET (out_down,
32453                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
32454                                out_down)));
32455           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32456         }
32457       else
32458         {
32459           /* Shifts by a constant greater than 31.  */
32460           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32461
32462           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32463             emit_insn (SET (out, const0_rtx));
32464           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32465           if (code == ASHIFTRT)
32466             emit_insn (gen_ashrsi3 (out_up, in_up,
32467                                     GEN_INT (31)));
32468           else
32469             emit_insn (SET (out_up, const0_rtx));
32470         }
32471     }
32472   else
32473     {
32474       /* We have a shift-by-register.  */
32475       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32476
32477       /* This alternative requires the scratch registers.  */
32478       gcc_assert (scratch1 && REG_P (scratch1));
32479       gcc_assert (scratch2 && REG_P (scratch2));
32480
32481       /* We will need the values "amount-32" and "32-amount" later.
32482          Swapping them around now allows the later code to be more general. */
32483       switch (code)
32484         {
32485         case ASHIFT:
32486           emit_insn (SUB_32 (scratch1, amount));
32487           emit_insn (RSB_32 (scratch2, amount));
32488           break;
32489         case ASHIFTRT:
32490           emit_insn (RSB_32 (scratch1, amount));
32491           /* Also set CC = amount > 32.  */
32492           emit_insn (SUB_S_32 (scratch2, amount));
32493           break;
32494         case LSHIFTRT:
32495           emit_insn (RSB_32 (scratch1, amount));
32496           emit_insn (SUB_32 (scratch2, amount));
32497           break;
32498         default:
32499           gcc_unreachable ();
32500         }
32501
32502       /* Emit code like this:
32503
32504          arithmetic-left:
32505             out_down = in_down << amount;
32506             out_down = (in_up << (amount - 32)) | out_down;
32507             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32508             out_up = in_up << amount;
32509
32510          arithmetic-right:
32511             out_down = in_down >> amount;
32512             out_down = (in_up << (32 - amount)) | out_down;
32513             if (amount < 32)
32514               out_down = ((signed)in_up >> (amount - 32)) | out_down;
32515             out_up = in_up << amount;
32516
32517          logical-right:
32518             out_down = in_down >> amount;
32519             out_down = (in_up << (32 - amount)) | out_down;
32520             if (amount < 32)
32521               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32522             out_up = in_up << amount;
32523
32524           The ARM and Thumb2 variants are the same but implemented slightly
32525           differently.  If this were only called during expand we could just
32526           use the Thumb2 case and let combine do the right thing, but this
32527           can also be called from post-reload splitters.  */
32528
32529       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32530
32531       if (!TARGET_THUMB2)
32532         {
32533           /* Emit code for ARM mode.  */
32534           emit_insn (SET (out_down,
32535                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32536           if (code == ASHIFTRT)
32537             {
32538               rtx_code_label *done_label = gen_label_rtx ();
32539               emit_jump_insn (BRANCH (LT, done_label));
32540               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32541                                              out_down)));
32542               emit_label (done_label);
32543             }
32544           else
32545             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32546                                            out_down)));
32547         }
32548       else
32549         {
32550           /* Emit code for Thumb2 mode.
32551              Thumb2 can't do shift and or in one insn.  */
32552           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32553           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32554
32555           if (code == ASHIFTRT)
32556             {
32557               rtx_code_label *done_label = gen_label_rtx ();
32558               emit_jump_insn (BRANCH (LT, done_label));
32559               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32560               emit_insn (SET (out_down, ORR (out_down, scratch2)));
32561               emit_label (done_label);
32562             }
32563           else
32564             {
32565               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32566               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32567             }
32568         }
32569
32570       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32571     }
32572
32573   #undef SUB_32
32574   #undef RSB_32
32575   #undef SUB_S_32
32576   #undef SET
32577   #undef SHIFT
32578   #undef LSHIFT
32579   #undef REV_LSHIFT
32580   #undef ORR
32581   #undef BRANCH
32582 }
32583
32584 /* Returns true if the pattern is a valid symbolic address, which is either a
32585    symbol_ref or (symbol_ref + addend).
32586
32587    According to the ARM ELF ABI, the initial addend of REL-type relocations
32588    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32589    literal field of the instruction as a 16-bit signed value in the range
32590    -32768 <= A < 32768.
32591
32592    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32593    unsigned range of 0 <= A < 256 as described in the AAELF32
32594    relocation handling documentation: REL-type relocations are encoded
32595    as unsigned in this case.  */
32596
32597 bool
32598 arm_valid_symbolic_address_p (rtx addr)
32599 {
32600   rtx xop0, xop1 = NULL_RTX;
32601   rtx tmp = addr;
32602
32603   if (target_word_relocations)
32604     return false;
32605
32606   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32607     return true;
32608
32609   /* (const (plus: symbol_ref const_int))  */
32610   if (GET_CODE (addr) == CONST)
32611     tmp = XEXP (addr, 0);
32612
32613   if (GET_CODE (tmp) == PLUS)
32614     {
32615       xop0 = XEXP (tmp, 0);
32616       xop1 = XEXP (tmp, 1);
32617
32618       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32619         {
32620           if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32621             return IN_RANGE (INTVAL (xop1), 0, 0xff);
32622           else
32623             return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32624         }
32625     }
32626
32627   return false;
32628 }
32629
32630 /* Returns true if a valid comparison operation and makes
32631    the operands in a form that is valid.  */
32632 bool
32633 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32634 {
32635   enum rtx_code code = GET_CODE (*comparison);
32636   int code_int;
32637   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32638     ? GET_MODE (*op2) : GET_MODE (*op1);
32639
32640   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32641
32642   if (code == UNEQ || code == LTGT)
32643     return false;
32644
32645   code_int = (int)code;
32646   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32647   PUT_CODE (*comparison, (enum rtx_code)code_int);
32648
32649   switch (mode)
32650     {
32651     case E_SImode:
32652       if (!arm_add_operand (*op1, mode))
32653         *op1 = force_reg (mode, *op1);
32654       if (!arm_add_operand (*op2, mode))
32655         *op2 = force_reg (mode, *op2);
32656       return true;
32657
32658     case E_DImode:
32659       /* gen_compare_reg() will sort out any invalid operands.  */
32660       return true;
32661
32662     case E_HFmode:
32663       if (!TARGET_VFP_FP16INST)
32664         break;
32665       /* FP16 comparisons are done in SF mode.  */
32666       mode = SFmode;
32667       *op1 = convert_to_mode (mode, *op1, 1);
32668       *op2 = convert_to_mode (mode, *op2, 1);
32669       /* Fall through.  */
32670     case E_SFmode:
32671     case E_DFmode:
32672       if (!vfp_compare_operand (*op1, mode))
32673         *op1 = force_reg (mode, *op1);
32674       if (!vfp_compare_operand (*op2, mode))
32675         *op2 = force_reg (mode, *op2);
32676       return true;
32677     default:
32678       break;
32679     }
32680
32681   return false;
32682
32683 }
32684
32685 /* Maximum number of instructions to set block of memory.  */
32686 static int
32687 arm_block_set_max_insns (void)
32688 {
32689   if (optimize_function_for_size_p (cfun))
32690     return 4;
32691   else
32692     return current_tune->max_insns_inline_memset;
32693 }
32694
32695 /* Return TRUE if it's profitable to set block of memory for
32696    non-vectorized case.  VAL is the value to set the memory
32697    with.  LENGTH is the number of bytes to set.  ALIGN is the
32698    alignment of the destination memory in bytes.  UNALIGNED_P
32699    is TRUE if we can only set the memory with instructions
32700    meeting alignment requirements.  USE_STRD_P is TRUE if we
32701    can use strd to set the memory.  */
32702 static bool
32703 arm_block_set_non_vect_profit_p (rtx val,
32704                                  unsigned HOST_WIDE_INT length,
32705                                  unsigned HOST_WIDE_INT align,
32706                                  bool unaligned_p, bool use_strd_p)
32707 {
32708   int num = 0;
32709   /* For leftovers in bytes of 0-7, we can set the memory block using
32710      strb/strh/str with minimum instruction number.  */
32711   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32712
32713   if (unaligned_p)
32714     {
32715       num = arm_const_inline_cost (SET, val);
32716       num += length / align + length % align;
32717     }
32718   else if (use_strd_p)
32719     {
32720       num = arm_const_double_inline_cost (val);
32721       num += (length >> 3) + leftover[length & 7];
32722     }
32723   else
32724     {
32725       num = arm_const_inline_cost (SET, val);
32726       num += (length >> 2) + leftover[length & 3];
32727     }
32728
32729   /* We may be able to combine last pair STRH/STRB into a single STR
32730      by shifting one byte back.  */
32731   if (unaligned_access && length > 3 && (length & 3) == 3)
32732     num--;
32733
32734   return (num <= arm_block_set_max_insns ());
32735 }
32736
32737 /* Return TRUE if it's profitable to set block of memory for
32738    vectorized case.  LENGTH is the number of bytes to set.
32739    ALIGN is the alignment of destination memory in bytes.
32740    MODE is the vector mode used to set the memory.  */
32741 static bool
32742 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32743                              unsigned HOST_WIDE_INT align,
32744                              machine_mode mode)
32745 {
32746   int num;
32747   bool unaligned_p = ((align & 3) != 0);
32748   unsigned int nelt = GET_MODE_NUNITS (mode);
32749
32750   /* Instruction loading constant value.  */
32751   num = 1;
32752   /* Instructions storing the memory.  */
32753   num += (length + nelt - 1) / nelt;
32754   /* Instructions adjusting the address expression.  Only need to
32755      adjust address expression if it's 4 bytes aligned and bytes
32756      leftover can only be stored by mis-aligned store instruction.  */
32757   if (!unaligned_p && (length & 3) != 0)
32758     num++;
32759
32760   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32761   if (!unaligned_p && mode == V16QImode)
32762     num--;
32763
32764   return (num <= arm_block_set_max_insns ());
32765 }
32766
32767 /* Set a block of memory using vectorization instructions for the
32768    unaligned case.  We fill the first LENGTH bytes of the memory
32769    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32770    the alignment requirement of memory.  Return TRUE if succeeded.  */
32771 static bool
32772 arm_block_set_unaligned_vect (rtx dstbase,
32773                               unsigned HOST_WIDE_INT length,
32774                               unsigned HOST_WIDE_INT value,
32775                               unsigned HOST_WIDE_INT align)
32776 {
32777   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32778   rtx dst, mem;
32779   rtx val_vec, reg;
32780   rtx (*gen_func) (rtx, rtx);
32781   machine_mode mode;
32782   unsigned HOST_WIDE_INT v = value;
32783   unsigned int offset = 0;
32784   gcc_assert ((align & 0x3) != 0);
32785   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32786   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32787   if (length >= nelt_v16)
32788     {
32789       mode = V16QImode;
32790       gen_func = gen_movmisalignv16qi;
32791     }
32792   else
32793     {
32794       mode = V8QImode;
32795       gen_func = gen_movmisalignv8qi;
32796     }
32797   nelt_mode = GET_MODE_NUNITS (mode);
32798   gcc_assert (length >= nelt_mode);
32799   /* Skip if it isn't profitable.  */
32800   if (!arm_block_set_vect_profit_p (length, align, mode))
32801     return false;
32802
32803   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32804   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32805
32806   v = sext_hwi (v, BITS_PER_WORD);
32807
32808   reg = gen_reg_rtx (mode);
32809   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32810   /* Emit instruction loading the constant value.  */
32811   emit_move_insn (reg, val_vec);
32812
32813   /* Handle nelt_mode bytes in a vector.  */
32814   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32815     {
32816       emit_insn ((*gen_func) (mem, reg));
32817       if (i + 2 * nelt_mode <= length)
32818         {
32819           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32820           offset += nelt_mode;
32821           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32822         }
32823     }
32824
32825   /* If there are not less than nelt_v8 bytes leftover, we must be in
32826      V16QI mode.  */
32827   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32828
32829   /* Handle (8, 16) bytes leftover.  */
32830   if (i + nelt_v8 < length)
32831     {
32832       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32833       offset += length - i;
32834       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32835
32836       /* We are shifting bytes back, set the alignment accordingly.  */
32837       if ((length & 1) != 0 && align >= 2)
32838         set_mem_align (mem, BITS_PER_UNIT);
32839
32840       emit_insn (gen_movmisalignv16qi (mem, reg));
32841     }
32842   /* Handle (0, 8] bytes leftover.  */
32843   else if (i < length && i + nelt_v8 >= length)
32844     {
32845       if (mode == V16QImode)
32846         reg = gen_lowpart (V8QImode, reg);
32847
32848       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32849                                               + (nelt_mode - nelt_v8))));
32850       offset += (length - i) + (nelt_mode - nelt_v8);
32851       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32852
32853       /* We are shifting bytes back, set the alignment accordingly.  */
32854       if ((length & 1) != 0 && align >= 2)
32855         set_mem_align (mem, BITS_PER_UNIT);
32856
32857       emit_insn (gen_movmisalignv8qi (mem, reg));
32858     }
32859
32860   return true;
32861 }
32862
32863 /* Set a block of memory using vectorization instructions for the
32864    aligned case.  We fill the first LENGTH bytes of the memory area
32865    starting from DSTBASE with byte constant VALUE.  ALIGN is the
32866    alignment requirement of memory.  Return TRUE if succeeded.  */
32867 static bool
32868 arm_block_set_aligned_vect (rtx dstbase,
32869                             unsigned HOST_WIDE_INT length,
32870                             unsigned HOST_WIDE_INT value,
32871                             unsigned HOST_WIDE_INT align)
32872 {
32873   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32874   rtx dst, addr, mem;
32875   rtx val_vec, reg;
32876   machine_mode mode;
32877   unsigned int offset = 0;
32878
32879   gcc_assert ((align & 0x3) == 0);
32880   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32881   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32882   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32883     mode = V16QImode;
32884   else
32885     mode = V8QImode;
32886
32887   nelt_mode = GET_MODE_NUNITS (mode);
32888   gcc_assert (length >= nelt_mode);
32889   /* Skip if it isn't profitable.  */
32890   if (!arm_block_set_vect_profit_p (length, align, mode))
32891     return false;
32892
32893   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32894
32895   reg = gen_reg_rtx (mode);
32896   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32897   /* Emit instruction loading the constant value.  */
32898   emit_move_insn (reg, val_vec);
32899
32900   i = 0;
32901   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
32902   if (mode == V16QImode)
32903     {
32904       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32905       emit_insn (gen_movmisalignv16qi (mem, reg));
32906       i += nelt_mode;
32907       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
32908       if (i + nelt_v8 < length && i + nelt_v16 > length)
32909         {
32910           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32911           offset += length - nelt_mode;
32912           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32913           /* We are shifting bytes back, set the alignment accordingly.  */
32914           if ((length & 0x3) == 0)
32915             set_mem_align (mem, BITS_PER_UNIT * 4);
32916           else if ((length & 0x1) == 0)
32917             set_mem_align (mem, BITS_PER_UNIT * 2);
32918           else
32919             set_mem_align (mem, BITS_PER_UNIT);
32920
32921           emit_insn (gen_movmisalignv16qi (mem, reg));
32922           return true;
32923         }
32924       /* Fall through for bytes leftover.  */
32925       mode = V8QImode;
32926       nelt_mode = GET_MODE_NUNITS (mode);
32927       reg = gen_lowpart (V8QImode, reg);
32928     }
32929
32930   /* Handle 8 bytes in a vector.  */
32931   for (; (i + nelt_mode <= length); i += nelt_mode)
32932     {
32933       addr = plus_constant (Pmode, dst, i);
32934       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32935       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32936         emit_move_insn (mem, reg);
32937       else
32938         emit_insn (gen_unaligned_storev8qi (mem, reg));
32939     }
32940
32941   /* Handle single word leftover by shifting 4 bytes back.  We can
32942      use aligned access for this case.  */
32943   if (i + UNITS_PER_WORD == length)
32944     {
32945       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32946       offset += i - UNITS_PER_WORD;
32947       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32948       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32949       if (align > UNITS_PER_WORD)
32950         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32951
32952       emit_insn (gen_unaligned_storev8qi (mem, reg));
32953     }
32954   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32955      We have to use unaligned access for this case.  */
32956   else if (i < length)
32957     {
32958       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32959       offset += length - nelt_mode;
32960       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32961       /* We are shifting bytes back, set the alignment accordingly.  */
32962       if ((length & 1) == 0)
32963         set_mem_align (mem, BITS_PER_UNIT * 2);
32964       else
32965         set_mem_align (mem, BITS_PER_UNIT);
32966
32967       emit_insn (gen_movmisalignv8qi (mem, reg));
32968     }
32969
32970   return true;
32971 }
32972
32973 /* Set a block of memory using plain strh/strb instructions, only
32974    using instructions allowed by ALIGN on processor.  We fill the
32975    first LENGTH bytes of the memory area starting from DSTBASE
32976    with byte constant VALUE.  ALIGN is the alignment requirement
32977    of memory.  */
32978 static bool
32979 arm_block_set_unaligned_non_vect (rtx dstbase,
32980                                   unsigned HOST_WIDE_INT length,
32981                                   unsigned HOST_WIDE_INT value,
32982                                   unsigned HOST_WIDE_INT align)
32983 {
32984   unsigned int i;
32985   rtx dst, addr, mem;
32986   rtx val_exp, val_reg, reg;
32987   machine_mode mode;
32988   HOST_WIDE_INT v = value;
32989
32990   gcc_assert (align == 1 || align == 2);
32991
32992   if (align == 2)
32993     v |= (value << BITS_PER_UNIT);
32994
32995   v = sext_hwi (v, BITS_PER_WORD);
32996   val_exp = GEN_INT (v);
32997   /* Skip if it isn't profitable.  */
32998   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32999                                         align, true, false))
33000     return false;
33001
33002   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33003   mode = (align == 2 ? HImode : QImode);
33004   val_reg = force_reg (SImode, val_exp);
33005   reg = gen_lowpart (mode, val_reg);
33006
33007   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
33008     {
33009       addr = plus_constant (Pmode, dst, i);
33010       mem = adjust_automodify_address (dstbase, mode, addr, i);
33011       emit_move_insn (mem, reg);
33012     }
33013
33014   /* Handle single byte leftover.  */
33015   if (i + 1 == length)
33016     {
33017       reg = gen_lowpart (QImode, val_reg);
33018       addr = plus_constant (Pmode, dst, i);
33019       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33020       emit_move_insn (mem, reg);
33021       i++;
33022     }
33023
33024   gcc_assert (i == length);
33025   return true;
33026 }
33027
33028 /* Set a block of memory using plain strd/str/strh/strb instructions,
33029    to permit unaligned copies on processors which support unaligned
33030    semantics for those instructions.  We fill the first LENGTH bytes
33031    of the memory area starting from DSTBASE with byte constant VALUE.
33032    ALIGN is the alignment requirement of memory.  */
33033 static bool
33034 arm_block_set_aligned_non_vect (rtx dstbase,
33035                                 unsigned HOST_WIDE_INT length,
33036                                 unsigned HOST_WIDE_INT value,
33037                                 unsigned HOST_WIDE_INT align)
33038 {
33039   unsigned int i;
33040   rtx dst, addr, mem;
33041   rtx val_exp, val_reg, reg;
33042   unsigned HOST_WIDE_INT v;
33043   bool use_strd_p;
33044
33045   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
33046                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
33047
33048   v = (value | (value << 8) | (value << 16) | (value << 24));
33049   if (length < UNITS_PER_WORD)
33050     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
33051
33052   if (use_strd_p)
33053     v |= (v << BITS_PER_WORD);
33054   else
33055     v = sext_hwi (v, BITS_PER_WORD);
33056
33057   val_exp = GEN_INT (v);
33058   /* Skip if it isn't profitable.  */
33059   if (!arm_block_set_non_vect_profit_p (val_exp, length,
33060                                         align, false, use_strd_p))
33061     {
33062       if (!use_strd_p)
33063         return false;
33064
33065       /* Try without strd.  */
33066       v = (v >> BITS_PER_WORD);
33067       v = sext_hwi (v, BITS_PER_WORD);
33068       val_exp = GEN_INT (v);
33069       use_strd_p = false;
33070       if (!arm_block_set_non_vect_profit_p (val_exp, length,
33071                                             align, false, use_strd_p))
33072         return false;
33073     }
33074
33075   i = 0;
33076   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33077   /* Handle double words using strd if possible.  */
33078   if (use_strd_p)
33079     {
33080       val_reg = force_reg (DImode, val_exp);
33081       reg = val_reg;
33082       for (; (i + 8 <= length); i += 8)
33083         {
33084           addr = plus_constant (Pmode, dst, i);
33085           mem = adjust_automodify_address (dstbase, DImode, addr, i);
33086           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33087             emit_move_insn (mem, reg);
33088           else
33089             emit_insn (gen_unaligned_storedi (mem, reg));
33090         }
33091     }
33092   else
33093     val_reg = force_reg (SImode, val_exp);
33094
33095   /* Handle words.  */
33096   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
33097   for (; (i + 4 <= length); i += 4)
33098     {
33099       addr = plus_constant (Pmode, dst, i);
33100       mem = adjust_automodify_address (dstbase, SImode, addr, i);
33101       if ((align & 3) == 0)
33102         emit_move_insn (mem, reg);
33103       else
33104         emit_insn (gen_unaligned_storesi (mem, reg));
33105     }
33106
33107   /* Merge last pair of STRH and STRB into a STR if possible.  */
33108   if (unaligned_access && i > 0 && (i + 3) == length)
33109     {
33110       addr = plus_constant (Pmode, dst, i - 1);
33111       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33112       /* We are shifting one byte back, set the alignment accordingly.  */
33113       if ((align & 1) == 0)
33114         set_mem_align (mem, BITS_PER_UNIT);
33115
33116       /* Most likely this is an unaligned access, and we can't tell at
33117          compilation time.  */
33118       emit_insn (gen_unaligned_storesi (mem, reg));
33119       return true;
33120     }
33121
33122   /* Handle half word leftover.  */
33123   if (i + 2 <= length)
33124     {
33125       reg = gen_lowpart (HImode, val_reg);
33126       addr = plus_constant (Pmode, dst, i);
33127       mem = adjust_automodify_address (dstbase, HImode, addr, i);
33128       if ((align & 1) == 0)
33129         emit_move_insn (mem, reg);
33130       else
33131         emit_insn (gen_unaligned_storehi (mem, reg));
33132
33133       i += 2;
33134     }
33135
33136   /* Handle single byte leftover.  */
33137   if (i + 1 == length)
33138     {
33139       reg = gen_lowpart (QImode, val_reg);
33140       addr = plus_constant (Pmode, dst, i);
33141       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33142       emit_move_insn (mem, reg);
33143     }
33144
33145   return true;
33146 }
33147
33148 /* Set a block of memory using vectorization instructions for both
33149    aligned and unaligned cases.  We fill the first LENGTH bytes of
33150    the memory area starting from DSTBASE with byte constant VALUE.
33151    ALIGN is the alignment requirement of memory.  */
33152 static bool
33153 arm_block_set_vect (rtx dstbase,
33154                     unsigned HOST_WIDE_INT length,
33155                     unsigned HOST_WIDE_INT value,
33156                     unsigned HOST_WIDE_INT align)
33157 {
33158   /* Check whether we need to use unaligned store instruction.  */
33159   if (((align & 3) != 0 || (length & 3) != 0)
33160       /* Check whether unaligned store instruction is available.  */
33161       && (!unaligned_access || BYTES_BIG_ENDIAN))
33162     return false;
33163
33164   if ((align & 3) == 0)
33165     return arm_block_set_aligned_vect (dstbase, length, value, align);
33166   else
33167     return arm_block_set_unaligned_vect (dstbase, length, value, align);
33168 }
33169
33170 /* Expand string store operation.  Firstly we try to do that by using
33171    vectorization instructions, then try with ARM unaligned access and
33172    double-word store if profitable.  OPERANDS[0] is the destination,
33173    OPERANDS[1] is the number of bytes, operands[2] is the value to
33174    initialize the memory, OPERANDS[3] is the known alignment of the
33175    destination.  */
33176 bool
33177 arm_gen_setmem (rtx *operands)
33178 {
33179   rtx dstbase = operands[0];
33180   unsigned HOST_WIDE_INT length;
33181   unsigned HOST_WIDE_INT value;
33182   unsigned HOST_WIDE_INT align;
33183
33184   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33185     return false;
33186
33187   length = UINTVAL (operands[1]);
33188   if (length > 64)
33189     return false;
33190
33191   value = (UINTVAL (operands[2]) & 0xFF);
33192   align = UINTVAL (operands[3]);
33193   if (TARGET_NEON && length >= 8
33194       && current_tune->string_ops_prefer_neon
33195       && arm_block_set_vect (dstbase, length, value, align))
33196     return true;
33197
33198   if (!unaligned_access && (align & 3) != 0)
33199     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33200
33201   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33202 }
33203
33204
33205 static bool
33206 arm_macro_fusion_p (void)
33207 {
33208   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33209 }
33210
33211 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33212    for MOVW / MOVT macro fusion.  */
33213
33214 static bool
33215 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33216 {
33217   /* We are trying to fuse
33218      movw imm / movt imm
33219     instructions as a group that gets scheduled together.  */
33220
33221   rtx set_dest = SET_DEST (curr_set);
33222
33223   if (GET_MODE (set_dest) != SImode)
33224     return false;
33225
33226   /* We are trying to match:
33227      prev (movw)  == (set (reg r0) (const_int imm16))
33228      curr (movt) == (set (zero_extract (reg r0)
33229                                         (const_int 16)
33230                                         (const_int 16))
33231                           (const_int imm16_1))
33232      or
33233      prev (movw) == (set (reg r1)
33234                           (high (symbol_ref ("SYM"))))
33235     curr (movt) == (set (reg r0)
33236                         (lo_sum (reg r1)
33237                                 (symbol_ref ("SYM"))))  */
33238
33239     if (GET_CODE (set_dest) == ZERO_EXTRACT)
33240       {
33241         if (CONST_INT_P (SET_SRC (curr_set))
33242             && CONST_INT_P (SET_SRC (prev_set))
33243             && REG_P (XEXP (set_dest, 0))
33244             && REG_P (SET_DEST (prev_set))
33245             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33246           return true;
33247
33248       }
33249     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33250              && REG_P (SET_DEST (curr_set))
33251              && REG_P (SET_DEST (prev_set))
33252              && GET_CODE (SET_SRC (prev_set)) == HIGH
33253              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33254       return true;
33255
33256   return false;
33257 }
33258
33259 static bool
33260 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33261 {
33262   rtx prev_set = single_set (prev);
33263   rtx curr_set = single_set (curr);
33264
33265   if (!prev_set
33266       || !curr_set)
33267     return false;
33268
33269   if (any_condjump_p (curr))
33270     return false;
33271
33272   if (!arm_macro_fusion_p ())
33273     return false;
33274
33275   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33276       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33277     return true;
33278
33279   return false;
33280 }
33281
33282 /* Return true iff the instruction fusion described by OP is enabled.  */
33283 bool
33284 arm_fusion_enabled_p (tune_params::fuse_ops op)
33285 {
33286   return current_tune->fusible_ops & op;
33287 }
33288
33289 /* Return TRUE if return address signing mechanism is enabled.  */
33290 bool
33291 arm_current_function_pac_enabled_p (void)
33292 {
33293   return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33294           || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33295               && !crtl->is_leaf));
33296 }
33297
33298 /* Raise an error if the current target arch is not bti compatible.  */
33299 void aarch_bti_arch_check (void)
33300 {
33301   if (!arm_arch8m_main)
33302     error ("This architecture does not support branch protection instructions");
33303 }
33304
33305 /* Return TRUE if Branch Target Identification Mechanism is enabled.  */
33306 bool
33307 aarch_bti_enabled (void)
33308 {
33309   return aarch_enable_bti != 0;
33310 }
33311
33312 /* Check if INSN is a BTI J insn.  */
33313 bool
33314 aarch_bti_j_insn_p (rtx_insn *insn)
33315 {
33316   if (!insn || !INSN_P (insn))
33317     return false;
33318
33319   rtx pat = PATTERN (insn);
33320   return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33321 }
33322
33323 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
33324 bool
33325 aarch_pac_insn_p (rtx x)
33326 {
33327   if (!x || !INSN_P (x))
33328     return false;
33329
33330   rtx pat = PATTERN (x);
33331
33332   if (GET_CODE (pat) == SET)
33333     {
33334       rtx tmp = XEXP (pat, 1);
33335       if (tmp
33336           && ((GET_CODE (tmp) == UNSPEC
33337                && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33338               || (GET_CODE (tmp) == UNSPEC_VOLATILE
33339                   && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33340         return true;
33341     }
33342
33343   return false;
33344 }
33345
33346  /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33347     For Arm, both of these map to a simple BTI instruction.  */
33348
33349 rtx
33350 aarch_gen_bti_c (void)
33351 {
33352   return gen_bti_nop ();
33353 }
33354
33355 rtx
33356 aarch_gen_bti_j (void)
33357 {
33358   return gen_bti_nop ();
33359 }
33360
33361 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
33362    scheduled for speculative execution.  Reject the long-running division
33363    and square-root instructions.  */
33364
33365 static bool
33366 arm_sched_can_speculate_insn (rtx_insn *insn)
33367 {
33368   switch (get_attr_type (insn))
33369     {
33370       case TYPE_SDIV:
33371       case TYPE_UDIV:
33372       case TYPE_FDIVS:
33373       case TYPE_FDIVD:
33374       case TYPE_FSQRTS:
33375       case TYPE_FSQRTD:
33376       case TYPE_NEON_FP_SQRT_S:
33377       case TYPE_NEON_FP_SQRT_D:
33378       case TYPE_NEON_FP_SQRT_S_Q:
33379       case TYPE_NEON_FP_SQRT_D_Q:
33380       case TYPE_NEON_FP_DIV_S:
33381       case TYPE_NEON_FP_DIV_D:
33382       case TYPE_NEON_FP_DIV_S_Q:
33383       case TYPE_NEON_FP_DIV_D_Q:
33384         return false;
33385       default:
33386         return true;
33387     }
33388 }
33389
33390 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
33391
33392 static unsigned HOST_WIDE_INT
33393 arm_asan_shadow_offset (void)
33394 {
33395   return HOST_WIDE_INT_1U << 29;
33396 }
33397
33398
33399 /* This is a temporary fix for PR60655.  Ideally we need
33400    to handle most of these cases in the generic part but
33401    currently we reject minus (..) (sym_ref).  We try to
33402    ameliorate the case with minus (sym_ref1) (sym_ref2)
33403    where they are in the same section.  */
33404
33405 static bool
33406 arm_const_not_ok_for_debug_p (rtx p)
33407 {
33408   tree decl_op0 = NULL;
33409   tree decl_op1 = NULL;
33410
33411   if (GET_CODE (p) == UNSPEC)
33412     return true;
33413   if (GET_CODE (p) == MINUS)
33414     {
33415       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33416         {
33417           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33418           if (decl_op1
33419               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33420               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33421             {
33422               if ((VAR_P (decl_op1)
33423                    || TREE_CODE (decl_op1) == CONST_DECL)
33424                   && (VAR_P (decl_op0)
33425                       || TREE_CODE (decl_op0) == CONST_DECL))
33426                 return (get_variable_section (decl_op1, false)
33427                         != get_variable_section (decl_op0, false));
33428
33429               if (TREE_CODE (decl_op1) == LABEL_DECL
33430                   && TREE_CODE (decl_op0) == LABEL_DECL)
33431                 return (DECL_CONTEXT (decl_op1)
33432                         != DECL_CONTEXT (decl_op0));
33433             }
33434
33435           return true;
33436         }
33437     }
33438
33439   return false;
33440 }
33441
33442 /* return TRUE if x is a reference to a value in a constant pool */
33443 extern bool
33444 arm_is_constant_pool_ref (rtx x)
33445 {
33446   return (MEM_P (x)
33447           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33448           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33449 }
33450
33451 /* Remember the last target of arm_set_current_function.  */
33452 static GTY(()) tree arm_previous_fndecl;
33453
33454 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
33455
33456 void
33457 save_restore_target_globals (tree new_tree)
33458 {
33459   /* If we have a previous state, use it.  */
33460   if (TREE_TARGET_GLOBALS (new_tree))
33461     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33462   else if (new_tree == target_option_default_node)
33463     restore_target_globals (&default_target_globals);
33464   else
33465     {
33466       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
33467       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33468     }
33469
33470   arm_option_params_internal ();
33471 }
33472
33473 /* Invalidate arm_previous_fndecl.  */
33474
33475 void
33476 arm_reset_previous_fndecl (void)
33477 {
33478   arm_previous_fndecl = NULL_TREE;
33479 }
33480
33481 /* Establish appropriate back-end context for processing the function
33482    FNDECL.  The argument might be NULL to indicate processing at top
33483    level, outside of any function scope.  */
33484
33485 static void
33486 arm_set_current_function (tree fndecl)
33487 {
33488   if (!fndecl || fndecl == arm_previous_fndecl)
33489     return;
33490
33491   tree old_tree = (arm_previous_fndecl
33492                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33493                    : NULL_TREE);
33494
33495   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33496
33497   /* If current function has no attributes but previous one did,
33498      use the default node.  */
33499   if (! new_tree && old_tree)
33500     new_tree = target_option_default_node;
33501
33502   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
33503      the default have been handled by save_restore_target_globals from
33504      arm_pragma_target_parse.  */
33505   if (old_tree == new_tree)
33506     return;
33507
33508   arm_previous_fndecl = fndecl;
33509
33510   /* First set the target options.  */
33511   cl_target_option_restore (&global_options, &global_options_set,
33512                             TREE_TARGET_OPTION (new_tree));
33513
33514   save_restore_target_globals (new_tree);
33515
33516   arm_override_options_after_change_1 (&global_options, &global_options_set);
33517 }
33518
33519 /* Implement TARGET_OPTION_PRINT.  */
33520
33521 static void
33522 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33523 {
33524   int flags = ptr->x_target_flags;
33525   const char *fpu_name;
33526
33527   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33528               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33529
33530   fprintf (file, "%*sselected isa %s\n", indent, "",
33531            TARGET_THUMB2_P (flags) ? "thumb2" :
33532            TARGET_THUMB_P (flags) ? "thumb1" :
33533            "arm");
33534
33535   if (ptr->x_arm_arch_string)
33536     fprintf (file, "%*sselected architecture %s\n", indent, "",
33537              ptr->x_arm_arch_string);
33538
33539   if (ptr->x_arm_cpu_string)
33540     fprintf (file, "%*sselected CPU %s\n", indent, "",
33541              ptr->x_arm_cpu_string);
33542
33543   if (ptr->x_arm_tune_string)
33544     fprintf (file, "%*sselected tune %s\n", indent, "",
33545              ptr->x_arm_tune_string);
33546
33547   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33548 }
33549
33550 /* Hook to determine if one function can safely inline another.  */
33551
33552 static bool
33553 arm_can_inline_p (tree caller, tree callee)
33554 {
33555   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33556   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33557   bool can_inline = true;
33558
33559   struct cl_target_option *caller_opts
33560         = TREE_TARGET_OPTION (caller_tree ? caller_tree
33561                                            : target_option_default_node);
33562
33563   struct cl_target_option *callee_opts
33564         = TREE_TARGET_OPTION (callee_tree ? callee_tree
33565                                            : target_option_default_node);
33566
33567   if (callee_opts == caller_opts)
33568     return true;
33569
33570   /* Callee's ISA features should be a subset of the caller's.  */
33571   struct arm_build_target caller_target;
33572   struct arm_build_target callee_target;
33573   caller_target.isa = sbitmap_alloc (isa_num_bits);
33574   callee_target.isa = sbitmap_alloc (isa_num_bits);
33575
33576   arm_configure_build_target (&caller_target, caller_opts, false);
33577   arm_configure_build_target (&callee_target, callee_opts, false);
33578   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33579     can_inline = false;
33580
33581   sbitmap_free (caller_target.isa);
33582   sbitmap_free (callee_target.isa);
33583
33584   /* OK to inline between different modes.
33585      Function with mode specific instructions, e.g using asm,
33586      must be explicitly protected with noinline.  */
33587   return can_inline;
33588 }
33589
33590 /* Hook to fix function's alignment affected by target attribute.  */
33591
33592 static void
33593 arm_relayout_function (tree fndecl)
33594 {
33595   if (DECL_USER_ALIGN (fndecl))
33596     return;
33597
33598   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33599
33600   if (!callee_tree)
33601     callee_tree = target_option_default_node;
33602
33603   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33604   SET_DECL_ALIGN
33605     (fndecl,
33606      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33607 }
33608
33609 /* Inner function to process the attribute((target(...))), take an argument and
33610    set the current options from the argument.  If we have a list, recursively
33611    go over the list.  */
33612
33613 static bool
33614 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33615 {
33616   if (TREE_CODE (args) == TREE_LIST)
33617     {
33618       bool ret = true;
33619
33620       for (; args; args = TREE_CHAIN (args))
33621         if (TREE_VALUE (args)
33622             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33623           ret = false;
33624       return ret;
33625     }
33626
33627   else if (TREE_CODE (args) != STRING_CST)
33628     {
33629       error ("attribute %<target%> argument not a string");
33630       return false;
33631     }
33632
33633   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33634   char *q;
33635
33636   while ((q = strtok (argstr, ",")) != NULL)
33637     {
33638       argstr = NULL;
33639       if (!strcmp (q, "thumb"))
33640         {
33641           opts->x_target_flags |= MASK_THUMB;
33642           if (TARGET_FDPIC && !arm_arch_thumb2)
33643             sorry ("FDPIC mode is not supported in Thumb-1 mode");
33644         }
33645
33646       else if (!strcmp (q, "arm"))
33647         opts->x_target_flags &= ~MASK_THUMB;
33648
33649       else if (!strcmp (q, "general-regs-only"))
33650         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33651
33652       else if (startswith (q, "fpu="))
33653         {
33654           int fpu_index;
33655           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33656                                        &fpu_index, CL_TARGET))
33657             {
33658               error ("invalid fpu for target attribute or pragma %qs", q);
33659               return false;
33660             }
33661           if (fpu_index == TARGET_FPU_auto)
33662             {
33663               /* This doesn't really make sense until we support
33664                  general dynamic selection of the architecture and all
33665                  sub-features.  */
33666               sorry ("auto fpu selection not currently permitted here");
33667               return false;
33668             }
33669           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33670         }
33671       else if (startswith (q, "arch="))
33672         {
33673           char *arch = q + 5;
33674           const arch_option *arm_selected_arch
33675              = arm_parse_arch_option_name (all_architectures, "arch", arch);
33676
33677           if (!arm_selected_arch)
33678             {
33679               error ("invalid architecture for target attribute or pragma %qs",
33680                      q);
33681               return false;
33682             }
33683
33684           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33685         }
33686       else if (q[0] == '+')
33687         {
33688           opts->x_arm_arch_string
33689             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33690         }
33691       else
33692         {
33693           error ("unknown target attribute or pragma %qs", q);
33694           return false;
33695         }
33696     }
33697
33698   return true;
33699 }
33700
33701 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
33702
33703 tree
33704 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33705                                  struct gcc_options *opts_set)
33706 {
33707   struct cl_target_option cl_opts;
33708
33709   if (!arm_valid_target_attribute_rec (args, opts))
33710     return NULL_TREE;
33711
33712   cl_target_option_save (&cl_opts, opts, opts_set);
33713   arm_configure_build_target (&arm_active_target, &cl_opts, false);
33714   arm_option_check_internal (opts);
33715   /* Do any overrides, such as global options arch=xxx.
33716      We do this since arm_active_target was overridden.  */
33717   arm_option_reconfigure_globals ();
33718   arm_options_perform_arch_sanity_checks ();
33719   arm_option_override_internal (opts, opts_set);
33720
33721   return build_target_option_node (opts, opts_set);
33722 }
33723
33724 static void
33725 add_attribute (const char * mode, tree *attributes)
33726 {
33727   size_t len = strlen (mode);
33728   tree value = build_string (len, mode);
33729
33730   TREE_TYPE (value) = build_array_type (char_type_node,
33731                                         build_index_type (size_int (len)));
33732
33733   *attributes = tree_cons (get_identifier ("target"),
33734                            build_tree_list (NULL_TREE, value),
33735                            *attributes);
33736 }
33737
33738 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33739
33740 static void
33741 arm_insert_attributes (tree fndecl, tree * attributes)
33742 {
33743   const char *mode;
33744
33745   if (! TARGET_FLIP_THUMB)
33746     return;
33747
33748   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33749       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33750    return;
33751
33752   /* Nested definitions must inherit mode.  */
33753   if (current_function_decl)
33754    {
33755      mode = TARGET_THUMB ? "thumb" : "arm";
33756      add_attribute (mode, attributes);
33757      return;
33758    }
33759
33760   /* If there is already a setting don't change it.  */
33761   if (lookup_attribute ("target", *attributes) != NULL)
33762     return;
33763
33764   mode = thumb_flipper ? "thumb" : "arm";
33765   add_attribute (mode, attributes);
33766
33767   thumb_flipper = !thumb_flipper;
33768 }
33769
33770 /* Hook to validate attribute((target("string"))).  */
33771
33772 static bool
33773 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33774                               tree args, int ARG_UNUSED (flags))
33775 {
33776   bool ret = true;
33777   struct gcc_options func_options, func_options_set;
33778   tree cur_tree, new_optimize;
33779   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33780
33781   /* Get the optimization options of the current function.  */
33782   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33783
33784   /* If the function changed the optimization levels as well as setting target
33785      options, start with the optimizations specified.  */
33786   if (!func_optimize)
33787     func_optimize = optimization_default_node;
33788
33789   /* Init func_options.  */
33790   memset (&func_options, 0, sizeof (func_options));
33791   init_options_struct (&func_options, NULL);
33792   lang_hooks.init_options_struct (&func_options);
33793   memset (&func_options_set, 0, sizeof (func_options_set));
33794
33795   /* Initialize func_options to the defaults.  */
33796   cl_optimization_restore (&func_options, &func_options_set,
33797                            TREE_OPTIMIZATION (func_optimize));
33798
33799   cl_target_option_restore (&func_options, &func_options_set,
33800                             TREE_TARGET_OPTION (target_option_default_node));
33801
33802   /* Set func_options flags with new target mode.  */
33803   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33804                                               &func_options_set);
33805
33806   if (cur_tree == NULL_TREE)
33807     ret = false;
33808
33809   new_optimize = build_optimization_node (&func_options, &func_options_set);
33810
33811   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33812
33813   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33814
33815   return ret;
33816 }
33817
33818 /* Match an ISA feature bitmap to a named FPU.  We always use the
33819    first entry that exactly matches the feature set, so that we
33820    effectively canonicalize the FPU name for the assembler.  */
33821 static const char*
33822 arm_identify_fpu_from_isa (sbitmap isa)
33823 {
33824   auto_sbitmap fpubits (isa_num_bits);
33825   auto_sbitmap cand_fpubits (isa_num_bits);
33826
33827   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33828
33829   /* If there are no ISA feature bits relating to the FPU, we must be
33830      doing soft-float.  */
33831   if (bitmap_empty_p (fpubits))
33832     return "softvfp";
33833
33834   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33835     {
33836       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33837       if (bitmap_equal_p (fpubits, cand_fpubits))
33838         return all_fpus[i].name;
33839     }
33840   /* We must find an entry, or things have gone wrong.  */
33841   gcc_unreachable ();
33842 }
33843
33844 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33845    by the function fndecl.  */
33846 void
33847 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33848 {
33849   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33850
33851   struct cl_target_option *targ_options;
33852   if (target_parts)
33853     targ_options = TREE_TARGET_OPTION (target_parts);
33854   else
33855     targ_options = TREE_TARGET_OPTION (target_option_current_node);
33856   gcc_assert (targ_options);
33857
33858   arm_print_asm_arch_directives (stream, targ_options);
33859
33860   fprintf (stream, "\t.syntax unified\n");
33861
33862   if (TARGET_THUMB)
33863     {
33864       if (is_called_in_ARM_mode (decl)
33865           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33866               && cfun->is_thunk))
33867         fprintf (stream, "\t.code 32\n");
33868       else if (TARGET_THUMB1)
33869         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33870       else
33871         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33872     }
33873   else
33874     fprintf (stream, "\t.arm\n");
33875
33876   if (TARGET_POKE_FUNCTION_NAME)
33877     arm_poke_function_name (stream, (const char *) name);
33878 }
33879
33880 /* If MEM is in the form of [base+offset], extract the two parts
33881    of address and set to BASE and OFFSET, otherwise return false
33882    after clearing BASE and OFFSET.  */
33883
33884 static bool
33885 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33886 {
33887   rtx addr;
33888
33889   gcc_assert (MEM_P (mem));
33890
33891   addr = XEXP (mem, 0);
33892
33893   /* Strip off const from addresses like (const (addr)).  */
33894   if (GET_CODE (addr) == CONST)
33895     addr = XEXP (addr, 0);
33896
33897   if (REG_P (addr))
33898     {
33899       *base = addr;
33900       *offset = const0_rtx;
33901       return true;
33902     }
33903
33904   if (GET_CODE (addr) == PLUS
33905       && GET_CODE (XEXP (addr, 0)) == REG
33906       && CONST_INT_P (XEXP (addr, 1)))
33907     {
33908       *base = XEXP (addr, 0);
33909       *offset = XEXP (addr, 1);
33910       return true;
33911     }
33912
33913   *base = NULL_RTX;
33914   *offset = NULL_RTX;
33915
33916   return false;
33917 }
33918
33919 /* If INSN is a load or store of address in the form of [base+offset],
33920    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
33921    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
33922    otherwise return FALSE.  */
33923
33924 static bool
33925 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33926 {
33927   rtx x, dest, src;
33928
33929   gcc_assert (INSN_P (insn));
33930   x = PATTERN (insn);
33931   if (GET_CODE (x) != SET)
33932     return false;
33933
33934   src = SET_SRC (x);
33935   dest = SET_DEST (x);
33936   if (REG_P (src) && MEM_P (dest))
33937     {
33938       *is_load = false;
33939       extract_base_offset_in_addr (dest, base, offset);
33940     }
33941   else if (MEM_P (src) && REG_P (dest))
33942     {
33943       *is_load = true;
33944       extract_base_offset_in_addr (src, base, offset);
33945     }
33946   else
33947     return false;
33948
33949   return (*base != NULL_RTX && *offset != NULL_RTX);
33950 }
33951
33952 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33953
33954    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33955    and PRI are only calculated for these instructions.  For other instruction,
33956    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33957    instruction fusion can be supported by returning different priorities.
33958
33959    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33960
33961 static void
33962 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33963                            int *fusion_pri, int *pri)
33964 {
33965   int tmp, off_val;
33966   bool is_load;
33967   rtx base, offset;
33968
33969   gcc_assert (INSN_P (insn));
33970
33971   tmp = max_pri - 1;
33972   if (!fusion_load_store (insn, &base, &offset, &is_load))
33973     {
33974       *pri = tmp;
33975       *fusion_pri = tmp;
33976       return;
33977     }
33978
33979   /* Load goes first.  */
33980   if (is_load)
33981     *fusion_pri = tmp - 1;
33982   else
33983     *fusion_pri = tmp - 2;
33984
33985   tmp /= 2;
33986
33987   /* INSN with smaller base register goes first.  */
33988   tmp -= ((REGNO (base) & 0xff) << 20);
33989
33990   /* INSN with smaller offset goes first.  */
33991   off_val = (int)(INTVAL (offset));
33992   if (off_val >= 0)
33993     tmp -= (off_val & 0xfffff);
33994   else
33995     tmp += ((- off_val) & 0xfffff);
33996
33997   *pri = tmp;
33998   return;
33999 }
34000
34001
34002 /* Construct and return a PARALLEL RTX vector with elements numbering the
34003    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34004    the vector - from the perspective of the architecture.  This does not
34005    line up with GCC's perspective on lane numbers, so we end up with
34006    different masks depending on our target endian-ness.  The diagram
34007    below may help.  We must draw the distinction when building masks
34008    which select one half of the vector.  An instruction selecting
34009    architectural low-lanes for a big-endian target, must be described using
34010    a mask selecting GCC high-lanes.
34011
34012                  Big-Endian             Little-Endian
34013
34014 GCC             0   1   2   3           3   2   1   0
34015               | x | x | x | x |       | x | x | x | x |
34016 Architecture    3   2   1   0           3   2   1   0
34017
34018 Low Mask:         { 2, 3 }                { 0, 1 }
34019 High Mask:        { 0, 1 }                { 2, 3 }
34020 */
34021
34022 rtx
34023 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
34024 {
34025   int nunits = GET_MODE_NUNITS (mode);
34026   rtvec v = rtvec_alloc (nunits / 2);
34027   int high_base = nunits / 2;
34028   int low_base = 0;
34029   int base;
34030   rtx t1;
34031   int i;
34032
34033   if (BYTES_BIG_ENDIAN)
34034     base = high ? low_base : high_base;
34035   else
34036     base = high ? high_base : low_base;
34037
34038   for (i = 0; i < nunits / 2; i++)
34039     RTVEC_ELT (v, i) = GEN_INT (base + i);
34040
34041   t1 = gen_rtx_PARALLEL (mode, v);
34042   return t1;
34043 }
34044
34045 /* Check OP for validity as a PARALLEL RTX vector with elements
34046    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34047    from the perspective of the architecture.  See the diagram above
34048    arm_simd_vect_par_cnst_half_p for more details.  */
34049
34050 bool
34051 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
34052                                        bool high)
34053 {
34054   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
34055   HOST_WIDE_INT count_op = XVECLEN (op, 0);
34056   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
34057   int i = 0;
34058
34059   if (!VECTOR_MODE_P (mode))
34060     return false;
34061
34062   if (count_op != count_ideal)
34063     return false;
34064
34065   for (i = 0; i < count_ideal; i++)
34066     {
34067       rtx elt_op = XVECEXP (op, 0, i);
34068       rtx elt_ideal = XVECEXP (ideal, 0, i);
34069
34070       if (!CONST_INT_P (elt_op)
34071           || INTVAL (elt_ideal) != INTVAL (elt_op))
34072         return false;
34073     }
34074   return true;
34075 }
34076
34077 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34078    in Thumb1.  */
34079 static bool
34080 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
34081                          const_tree)
34082 {
34083   /* For now, we punt and not handle this for TARGET_THUMB1.  */
34084   if (vcall_offset && TARGET_THUMB1)
34085     return false;
34086
34087   /* Otherwise ok.  */
34088   return true;
34089 }
34090
34091 /* Generate RTL for a conditional branch with rtx comparison CODE in
34092    mode CC_MODE. The destination of the unlikely conditional branch
34093    is LABEL_REF.  */
34094
34095 void
34096 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
34097                           rtx label_ref)
34098 {
34099   rtx x;
34100   x = gen_rtx_fmt_ee (code, VOIDmode,
34101                       gen_rtx_REG (cc_mode, CC_REGNUM),
34102                       const0_rtx);
34103
34104   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34105                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
34106                             pc_rtx);
34107   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34108 }
34109
34110 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34111
34112    For pure-code sections there is no letter code for this attribute, so
34113    output all the section flags numerically when this is needed.  */
34114
34115 static bool
34116 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34117 {
34118
34119   if (flags & SECTION_ARM_PURECODE)
34120     {
34121       *num = 0x20000000;
34122
34123       if (!(flags & SECTION_DEBUG))
34124         *num |= 0x2;
34125       if (flags & SECTION_EXCLUDE)
34126         *num |= 0x80000000;
34127       if (flags & SECTION_WRITE)
34128         *num |= 0x1;
34129       if (flags & SECTION_CODE)
34130         *num |= 0x4;
34131       if (flags & SECTION_MERGE)
34132         *num |= 0x10;
34133       if (flags & SECTION_STRINGS)
34134         *num |= 0x20;
34135       if (flags & SECTION_TLS)
34136         *num |= 0x400;
34137       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34138         *num |= 0x200;
34139
34140         return true;
34141     }
34142
34143   return false;
34144 }
34145
34146 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34147
34148    If pure-code is passed as an option, make sure all functions are in
34149    sections that have the SHF_ARM_PURECODE attribute.  */
34150
34151 static section *
34152 arm_function_section (tree decl, enum node_frequency freq,
34153                       bool startup, bool exit)
34154 {
34155   const char * section_name;
34156   section * sec;
34157
34158   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34159     return default_function_section (decl, freq, startup, exit);
34160
34161   if (!target_pure_code)
34162     return default_function_section (decl, freq, startup, exit);
34163
34164
34165   section_name = DECL_SECTION_NAME (decl);
34166
34167   /* If a function is not in a named section then it falls under the 'default'
34168      text section, also known as '.text'.  We can preserve previous behavior as
34169      the default text section already has the SHF_ARM_PURECODE section
34170      attribute.  */
34171   if (!section_name)
34172     {
34173       section *default_sec = default_function_section (decl, freq, startup,
34174                                                        exit);
34175
34176       /* If default_sec is not null, then it must be a special section like for
34177          example .text.startup.  We set the pure-code attribute and return the
34178          same section to preserve existing behavior.  */
34179       if (default_sec)
34180           default_sec->common.flags |= SECTION_ARM_PURECODE;
34181       return default_sec;
34182     }
34183
34184   /* Otherwise look whether a section has already been created with
34185      'section_name'.  */
34186   sec = get_named_section (decl, section_name, 0);
34187   if (!sec)
34188     /* If that is not the case passing NULL as the section's name to
34189        'get_named_section' will create a section with the declaration's
34190        section name.  */
34191     sec = get_named_section (decl, NULL, 0);
34192
34193   /* Set the SHF_ARM_PURECODE attribute.  */
34194   sec->common.flags |= SECTION_ARM_PURECODE;
34195
34196   return sec;
34197 }
34198
34199 /* Implements the TARGET_SECTION_FLAGS hook.
34200
34201    If DECL is a function declaration and pure-code is passed as an option
34202    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
34203    section's name and RELOC indicates whether the declarations initializer may
34204    contain runtime relocations.  */
34205
34206 static unsigned int
34207 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34208 {
34209   unsigned int flags = default_section_type_flags (decl, name, reloc);
34210
34211   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34212     flags |= SECTION_ARM_PURECODE;
34213
34214   return flags;
34215 }
34216
34217 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
34218
34219 static void
34220 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34221                            rtx op0, rtx op1,
34222                            rtx *quot_p, rtx *rem_p)
34223 {
34224   if (mode == SImode)
34225     gcc_assert (!TARGET_IDIV);
34226
34227   scalar_int_mode libval_mode
34228     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34229
34230   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34231                                         libval_mode, op0, mode, op1, mode);
34232
34233   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34234   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34235                                        GET_MODE_SIZE (mode));
34236
34237   gcc_assert (quotient);
34238   gcc_assert (remainder);
34239
34240   *quot_p = quotient;
34241   *rem_p = remainder;
34242 }
34243
34244 /*  This function checks for the availability of the coprocessor builtin passed
34245     in BUILTIN for the current target.  Returns true if it is available and
34246     false otherwise.  If a BUILTIN is passed for which this function has not
34247     been implemented it will cause an exception.  */
34248
34249 bool
34250 arm_coproc_builtin_available (enum unspecv builtin)
34251 {
34252   /* None of these builtins are available in Thumb mode if the target only
34253      supports Thumb-1.  */
34254   if (TARGET_THUMB1)
34255     return false;
34256
34257   switch (builtin)
34258     {
34259       case VUNSPEC_CDP:
34260       case VUNSPEC_LDC:
34261       case VUNSPEC_LDCL:
34262       case VUNSPEC_STC:
34263       case VUNSPEC_STCL:
34264       case VUNSPEC_MCR:
34265       case VUNSPEC_MRC:
34266         if (arm_arch4)
34267           return true;
34268         break;
34269       case VUNSPEC_CDP2:
34270       case VUNSPEC_LDC2:
34271       case VUNSPEC_LDC2L:
34272       case VUNSPEC_STC2:
34273       case VUNSPEC_STC2L:
34274       case VUNSPEC_MCR2:
34275       case VUNSPEC_MRC2:
34276         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34277            ARMv8-{A,M}.  */
34278         if (arm_arch5t)
34279           return true;
34280         break;
34281       case VUNSPEC_MCRR:
34282       case VUNSPEC_MRRC:
34283         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34284            ARMv8-{A,M}.  */
34285         if (arm_arch6 || arm_arch5te)
34286           return true;
34287         break;
34288       case VUNSPEC_MCRR2:
34289       case VUNSPEC_MRRC2:
34290         if (arm_arch6)
34291           return true;
34292         break;
34293       default:
34294         gcc_unreachable ();
34295     }
34296   return false;
34297 }
34298
34299 /* This function returns true if OP is a valid memory operand for the ldc and
34300    stc coprocessor instructions and false otherwise.  */
34301
34302 bool
34303 arm_coproc_ldc_stc_legitimate_address (rtx op)
34304 {
34305   HOST_WIDE_INT range;
34306   /* Has to be a memory operand.  */
34307   if (!MEM_P (op))
34308     return false;
34309
34310   op = XEXP (op, 0);
34311
34312   /* We accept registers.  */
34313   if (REG_P (op))
34314     return true;
34315
34316   switch GET_CODE (op)
34317     {
34318       case PLUS:
34319         {
34320           /* Or registers with an offset.  */
34321           if (!REG_P (XEXP (op, 0)))
34322             return false;
34323
34324           op = XEXP (op, 1);
34325
34326           /* The offset must be an immediate though.  */
34327           if (!CONST_INT_P (op))
34328             return false;
34329
34330           range = INTVAL (op);
34331
34332           /* Within the range of [-1020,1020].  */
34333           if (!IN_RANGE (range, -1020, 1020))
34334             return false;
34335
34336           /* And a multiple of 4.  */
34337           return (range % 4) == 0;
34338         }
34339       case PRE_INC:
34340       case POST_INC:
34341       case PRE_DEC:
34342       case POST_DEC:
34343         return REG_P (XEXP (op, 0));
34344       default:
34345         gcc_unreachable ();
34346     }
34347   return false;
34348 }
34349
34350 /* Return the diagnostic message string if conversion from FROMTYPE to
34351    TOTYPE is not allowed, NULL otherwise.  */
34352
34353 static const char *
34354 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34355 {
34356   if (element_mode (fromtype) != element_mode (totype))
34357     {
34358       /* Do no allow conversions to/from BFmode scalar types.  */
34359       if (TYPE_MODE (fromtype) == BFmode)
34360         return N_("invalid conversion from type %<bfloat16_t%>");
34361       if (TYPE_MODE (totype) == BFmode)
34362         return N_("invalid conversion to type %<bfloat16_t%>");
34363     }
34364
34365   /* Conversion allowed.  */
34366   return NULL;
34367 }
34368
34369 /* Return the diagnostic message string if the unary operation OP is
34370    not permitted on TYPE, NULL otherwise.  */
34371
34372 static const char *
34373 arm_invalid_unary_op (int op, const_tree type)
34374 {
34375   /* Reject all single-operand operations on BFmode except for &.  */
34376   if (element_mode (type) == BFmode && op != ADDR_EXPR)
34377     return N_("operation not permitted on type %<bfloat16_t%>");
34378
34379   /* Operation allowed.  */
34380   return NULL;
34381 }
34382
34383 /* Return the diagnostic message string if the binary operation OP is
34384    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
34385
34386 static const char *
34387 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34388                            const_tree type2)
34389 {
34390   /* Reject all 2-operand operations on BFmode.  */
34391   if (element_mode (type1) == BFmode
34392       || element_mode (type2) == BFmode)
34393     return N_("operation not permitted on type %<bfloat16_t%>");
34394
34395   /* Operation allowed.  */
34396   return NULL;
34397 }
34398
34399 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34400
34401    In VFPv1, VFP registers could only be accessed in the mode they were
34402    set, so subregs would be invalid there.  However, we don't support
34403    VFPv1 at the moment, and the restriction was lifted in VFPv2.
34404
34405    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34406    VFP registers in little-endian order.  We can't describe that accurately to
34407    GCC, so avoid taking subregs of such values.
34408
34409    The only exception is going from a 128-bit to a 64-bit type.  In that
34410    case the data layout happens to be consistent for big-endian, so we
34411    explicitly allow that case.  */
34412
34413 static bool
34414 arm_can_change_mode_class (machine_mode from, machine_mode to,
34415                            reg_class_t rclass)
34416 {
34417   if (TARGET_BIG_END
34418       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34419       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34420           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34421       && reg_classes_intersect_p (VFP_REGS, rclass))
34422     return false;
34423   return true;
34424 }
34425
34426 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
34427    strcpy from constants will be faster.  */
34428
34429 static HOST_WIDE_INT
34430 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34431 {
34432   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34433   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34434     return MAX (align, BITS_PER_WORD * factor);
34435   return align;
34436 }
34437
34438 /* Emit a speculation barrier on target architectures that do not have
34439    DSB/ISB directly.  Such systems probably don't need a barrier
34440    themselves, but if the code is ever run on a later architecture, it
34441    might become a problem.  */
34442 void
34443 arm_emit_speculation_barrier_function ()
34444 {
34445   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34446 }
34447
34448 /* Have we recorded an explicit access to the Q bit of APSR?.  */
34449 bool
34450 arm_q_bit_access (void)
34451 {
34452   if (cfun && cfun->decl)
34453     return lookup_attribute ("acle qbit",
34454                              DECL_ATTRIBUTES (cfun->decl));
34455   return true;
34456 }
34457
34458 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
34459 bool
34460 arm_ge_bits_access (void)
34461 {
34462   if (cfun && cfun->decl)
34463     return lookup_attribute ("acle gebits",
34464                              DECL_ATTRIBUTES (cfun->decl));
34465   return true;
34466 }
34467
34468 /* NULL if insn INSN is valid within a low-overhead loop.
34469    Otherwise return why doloop cannot be applied.  */
34470
34471 static const char *
34472 arm_invalid_within_doloop (const rtx_insn *insn)
34473 {
34474   if (!TARGET_HAVE_LOB)
34475     return default_invalid_within_doloop (insn);
34476
34477   if (CALL_P (insn))
34478     return "Function call in the loop.";
34479
34480   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34481     return "LR is used inside loop.";
34482
34483   return NULL;
34484 }
34485
34486 bool
34487 arm_target_insn_ok_for_lob (rtx insn)
34488 {
34489   basic_block bb = BLOCK_FOR_INSN (insn);
34490   /* Make sure the basic block of the target insn is a simple latch
34491      having as single predecessor and successor the body of the loop
34492      itself.  Only simple loops with a single basic block as body are
34493      supported for 'low over head loop' making sure that LE target is
34494      above LE itself in the generated code.  */
34495
34496   return single_succ_p (bb)
34497     && single_pred_p (bb)
34498     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34499     && contains_no_active_insn_p (bb);
34500 }
34501
34502 #if CHECKING_P
34503 namespace selftest {
34504
34505 /* Scan the static data tables generated by parsecpu.awk looking for
34506    potential issues with the data.  We primarily check for
34507    inconsistencies in the option extensions at present (extensions
34508    that duplicate others but aren't marked as aliases).  Furthermore,
34509    for correct canonicalization later options must never be a subset
34510    of an earlier option.  Any extension should also only specify other
34511    feature bits and never an architecture bit.  The architecture is inferred
34512    from the declaration of the extension.  */
34513 static void
34514 arm_test_cpu_arch_data (void)
34515 {
34516   const arch_option *arch;
34517   const cpu_option *cpu;
34518   auto_sbitmap target_isa (isa_num_bits);
34519   auto_sbitmap isa1 (isa_num_bits);
34520   auto_sbitmap isa2 (isa_num_bits);
34521
34522   for (arch = all_architectures; arch->common.name != NULL; ++arch)
34523     {
34524       const cpu_arch_extension *ext1, *ext2;
34525
34526       if (arch->common.extensions == NULL)
34527         continue;
34528
34529       arm_initialize_isa (target_isa, arch->common.isa_bits);
34530
34531       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34532         {
34533           if (ext1->alias)
34534             continue;
34535
34536           arm_initialize_isa (isa1, ext1->isa_bits);
34537           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34538             {
34539               if (ext2->alias || ext1->remove != ext2->remove)
34540                 continue;
34541
34542               arm_initialize_isa (isa2, ext2->isa_bits);
34543               /* If the option is a subset of the parent option, it doesn't
34544                  add anything and so isn't useful.  */
34545               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34546
34547               /* If the extension specifies any architectural bits then
34548                  disallow it.  Extensions should only specify feature bits.  */
34549               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34550             }
34551         }
34552     }
34553
34554   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34555     {
34556       const cpu_arch_extension *ext1, *ext2;
34557
34558       if (cpu->common.extensions == NULL)
34559         continue;
34560
34561       arm_initialize_isa (target_isa, arch->common.isa_bits);
34562
34563       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34564         {
34565           if (ext1->alias)
34566             continue;
34567
34568           arm_initialize_isa (isa1, ext1->isa_bits);
34569           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34570             {
34571               if (ext2->alias || ext1->remove != ext2->remove)
34572                 continue;
34573
34574               arm_initialize_isa (isa2, ext2->isa_bits);
34575               /* If the option is a subset of the parent option, it doesn't
34576                  add anything and so isn't useful.  */
34577               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34578
34579               /* If the extension specifies any architectural bits then
34580                  disallow it.  Extensions should only specify feature bits.  */
34581               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34582             }
34583         }
34584     }
34585 }
34586
34587 /* Scan the static data tables generated by parsecpu.awk looking for
34588    potential issues with the data.  Here we check for consistency between the
34589    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34590    a feature bit that is not defined by any FPU flag.  */
34591 static void
34592 arm_test_fpu_data (void)
34593 {
34594   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34595   auto_sbitmap fpubits (isa_num_bits);
34596   auto_sbitmap tmpset (isa_num_bits);
34597
34598   static const enum isa_feature fpu_bitlist_internal[]
34599     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34600   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34601
34602   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34603   {
34604     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34605     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34606     bitmap_clear (isa_all_fpubits_internal);
34607     bitmap_copy (isa_all_fpubits_internal, tmpset);
34608   }
34609
34610   if (!bitmap_empty_p (isa_all_fpubits_internal))
34611     {
34612         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34613                          " group that are not defined by any FPU.\n"
34614                          "       Check your arm-cpus.in.\n");
34615         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34616     }
34617 }
34618
34619 static void
34620 arm_run_selftests (void)
34621 {
34622   arm_test_cpu_arch_data ();
34623   arm_test_fpu_data ();
34624 }
34625 } /* Namespace selftest.  */
34626
34627 #undef TARGET_RUN_TARGET_SELFTESTS
34628 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34629 #endif /* CHECKING_P */
34630
34631 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34632    global variable based guard use the default else
34633    return a null tree.  */
34634 static tree
34635 arm_stack_protect_guard (void)
34636 {
34637   if (arm_stack_protector_guard == SSP_GLOBAL)
34638     return default_stack_protect_guard ();
34639
34640   return NULL_TREE;
34641 }
34642
34643 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34644    Unlike the arm version, we do NOT implement asm flag outputs.  */
34645
34646 rtx_insn *
34647 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34648                       vec<machine_mode> & /*input_modes*/,
34649                       vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34650                       HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34651 {
34652   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34653     if (startswith (constraints[i], "=@cc"))
34654       {
34655         sorry ("%<asm%> flags not supported in thumb1 mode");
34656         break;
34657       }
34658   return NULL;
34659 }
34660
34661 /* Generate code to enable conditional branches in functions over 1 MiB.
34662    Parameters are:
34663      operands: is the operands list of the asm insn (see arm_cond_branch or
34664        arm_cond_branch_reversed).
34665      pos_label: is an index into the operands array where operands[pos_label] is
34666        the asm label of the final jump destination.
34667      dest: is a string which is used to generate the asm label of the intermediate
34668        destination
34669    branch_format: is a string denoting the intermediate branch format, e.g.
34670      "beq", "bne", etc.  */
34671
34672 const char *
34673 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34674                     const char * branch_format)
34675 {
34676   rtx_code_label * tmp_label = gen_label_rtx ();
34677   char label_buf[256];
34678   char buffer[128];
34679   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34680                         CODE_LABEL_NUMBER (tmp_label));
34681   const char *label_ptr = arm_strip_name_encoding (label_buf);
34682   rtx dest_label = operands[pos_label];
34683   operands[pos_label] = tmp_label;
34684
34685   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34686   output_asm_insn (buffer, operands);
34687
34688   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34689   operands[pos_label] = dest_label;
34690   output_asm_insn (buffer, operands);
34691   return "";
34692 }
34693
34694 /* If given mode matches, load from memory to LO_REGS.
34695    (i.e [Rn], Rn <= LO_REGS).  */
34696 enum reg_class
34697 arm_mode_base_reg_class (machine_mode mode)
34698 {
34699   if (TARGET_HAVE_MVE
34700       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34701     return LO_REGS;
34702
34703   return MODE_BASE_REG_REG_CLASS (mode);
34704 }
34705
34706 struct gcc_target targetm = TARGET_INITIALIZER;
34707
34708 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
34709
34710 opt_machine_mode
34711 arm_get_mask_mode (machine_mode mode)
34712 {
34713   if (TARGET_HAVE_MVE)
34714     return arm_mode_to_pred_mode (mode);
34715
34716   return default_get_mask_mode (mode);
34717 }
34718
34719 /* Output assembly to read the thread pointer from the appropriate TPIDR
34720    register into DEST.  If PRED_P also emit the %? that can be used to
34721    output the predication code.  */
34722
34723 const char *
34724 arm_output_load_tpidr (rtx dst, bool pred_p)
34725 {
34726   char buf[64];
34727   int tpidr_coproc_num = -1;
34728   switch (target_thread_pointer)
34729     {
34730     case TP_TPIDRURW:
34731       tpidr_coproc_num = 2;
34732       break;
34733     case TP_TPIDRURO:
34734       tpidr_coproc_num = 3;
34735       break;
34736     case TP_TPIDRPRW:
34737       tpidr_coproc_num = 4;
34738       break;
34739     default:
34740       gcc_unreachable ();
34741     }
34742   snprintf (buf, sizeof (buf),
34743             "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34744             pred_p ? "%?" : "", tpidr_coproc_num);
34745   output_asm_insn (buf, &dst);
34746   return "";
34747 }
34748
34749 #include "gt-arm.h"