gcc/config/arm/arm.cc

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2022 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "cfgloop.h"
  36 #include "df.h"
  37 #include "tm_p.h"
  38 #include "stringpool.h"
  39 #include "attribs.h"
  40 #include "optabs.h"
  41 #include "regs.h"
  42 #include "emit-rtl.h"
  43 #include "recog.h"
  44 #include "cgraph.h"
  45 #include "diagnostic-core.h"
  46 #include "alias.h"
  47 #include "fold-const.h"
  48 #include "stor-layout.h"
  49 #include "calls.h"
  50 #include "varasm.h"
  51 #include "output.h"
  52 #include "insn-attr.h"
  53 #include "flags.h"
  54 #include "reload.h"
  55 #include "explow.h"
  56 #include "expr.h"
  57 #include "cfgrtl.h"
  58 #include "sched-int.h"
  59 #include "common/common-target.h"
  60 #include "langhooks.h"
  61 #include "intl.h"
  62 #include "libfuncs.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73 #include "tree-vectorizer.h"
  74 #include "opts.h"
  75
  76 /* This file should be included last.  */
  77 #include "target-def.h"
  78
  79 /* Forward definitions of types.  */
  80 typedef struct minipool_node    Mnode;
  81 typedef struct minipool_fixup   Mfix;
  82
  83 void (*arm_lang_output_object_attributes_hook)(void);
  84
  85 struct four_ints
  86 {
  87   int i[4];
  88 };
  89
  90 /* Forward function declarations.  */
  91 static bool arm_const_not_ok_for_debug_p (rtx);
  92 static int arm_needs_doubleword_align (machine_mode, const_tree);
  93 static int arm_compute_static_chain_stack_bytes (void);
  94 static arm_stack_offsets *arm_get_frame_offsets (void);
  95 static void arm_compute_frame_layout (void);
  96 static void arm_add_gc_roots (void);
  97 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
  98                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
  99 static unsigned bit_count (unsigned long);
 100 static unsigned bitmap_popcount (const sbitmap);
 101 static int arm_address_register_rtx_p (rtx, int);
 102 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 103 static bool is_called_in_ARM_mode (tree);
 104 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 105 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 106 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 107 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 108 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 109 inline static int thumb1_index_register_rtx_p (rtx, int);
 110 static int thumb_far_jump_used_p (void);
 111 static bool thumb_force_lr_save (void);
 112 static unsigned arm_size_return_regs (void);
 113 static bool arm_assemble_integer (rtx, unsigned int, int);
 114 static void arm_print_operand (FILE *, rtx, int);
 115 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 116 static bool arm_print_operand_punct_valid_p (unsigned char code);
 117 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 118 static arm_cc get_arm_condition_code (rtx);
 119 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 120 static const char *output_multi_immediate (rtx *, const char *, const char *,
 121                                            int, HOST_WIDE_INT);
 122 static const char *shift_op (rtx, HOST_WIDE_INT *);
 123 static struct machine_function *arm_init_machine_status (void);
 124 static void thumb_exit (FILE *, int);
 125 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 126 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 127 static Mnode *add_minipool_forward_ref (Mfix *);
 128 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 129 static Mnode *add_minipool_backward_ref (Mfix *);
 130 static void assign_minipool_offsets (Mfix *);
 131 static void arm_print_value (FILE *, rtx);
 132 static void dump_minipool (rtx_insn *);
 133 static int arm_barrier_cost (rtx_insn *);
 134 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 135 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 136 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 137                                machine_mode, rtx);
 138 static void arm_reorg (void);
 139 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 140 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 141 static unsigned long arm_compute_save_core_reg_mask (void);
 142 static unsigned long arm_isr_value (tree);
 143 static unsigned long arm_compute_func_type (void);
 144 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 145 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 146 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 147 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 148 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 149 #endif
 150 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 151 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 152 static void arm_output_function_epilogue (FILE *);
 153 static void arm_output_function_prologue (FILE *);
 154 static int arm_comp_type_attributes (const_tree, const_tree);
 155 static void arm_set_default_type_attributes (tree);
 156 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 157 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 158 static int optimal_immediate_sequence (enum rtx_code code,
 159                                        unsigned HOST_WIDE_INT val,
 160                                        struct four_ints *return_sequence);
 161 static int optimal_immediate_sequence_1 (enum rtx_code code,
 162                                          unsigned HOST_WIDE_INT val,
 163                                          struct four_ints *return_sequence,
 164                                          int i);
 165 static int arm_get_strip_length (int);
 166 static bool arm_function_ok_for_sibcall (tree, tree);
 167 static machine_mode arm_promote_function_mode (const_tree,
 168                                                     machine_mode, int *,
 169                                                     const_tree, int);
 170 static bool arm_return_in_memory (const_tree, const_tree);
 171 static rtx arm_function_value (const_tree, const_tree, bool);
 172 static rtx arm_libcall_value_1 (machine_mode);
 173 static rtx arm_libcall_value (machine_mode, const_rtx);
 174 static bool arm_function_value_regno_p (const unsigned int);
 175 static void arm_internal_label (FILE *, const char *, unsigned long);
 176 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 177                                  tree);
 178 static bool arm_have_conditional_execution (void);
 179 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 180 static bool arm_legitimate_constant_p (machine_mode, rtx);
 181 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 182 static int arm_insn_cost (rtx_insn *, bool);
 183 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 184 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 185 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 186 static void emit_constant_insn (rtx cond, rtx pattern);
 187 static rtx_insn *emit_set_insn (rtx, rtx);
 188 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 190 static void arm_emit_multi_reg_pop (unsigned long);
 191 static int vfp_emit_fstmd (int, int);
 192 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 193 static int arm_arg_partial_bytes (cumulative_args_t,
 194                                   const function_arg_info &);
 195 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 196 static void arm_function_arg_advance (cumulative_args_t,
 197                                       const function_arg_info &);
 198 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 199 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 200 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 201                                       const_tree);
 202 static rtx aapcs_libcall_value (machine_mode);
 203 static int aapcs_select_return_coproc (const_tree, const_tree);
 204
 205 #ifdef OBJECT_FORMAT_ELF
 206 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 207 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 208 #endif
 209 #ifndef ARM_PE
 210 static void arm_encode_section_info (tree, rtx, int);
 211 #endif
 212
 213 static void arm_file_end (void);
 214 static void arm_file_start (void);
 215 static void arm_insert_attributes (tree, tree *);
 216
 217 static void arm_setup_incoming_varargs (cumulative_args_t,
 218                                         const function_arg_info &, int *, int);
 219 static bool arm_pass_by_reference (cumulative_args_t,
 220                                    const function_arg_info &);
 221 static bool arm_promote_prototypes (const_tree);
 222 static bool arm_default_short_enums (void);
 223 static bool arm_align_anon_bitfield (void);
 224 static bool arm_return_in_msb (const_tree);
 225 static bool arm_must_pass_in_stack (const function_arg_info &);
 226 static bool arm_return_in_memory (const_tree, const_tree);
 227 #if ARM_UNWIND_INFO
 228 static void arm_unwind_emit (FILE *, rtx_insn *);
 229 static bool arm_output_ttype (rtx);
 230 static void arm_asm_emit_except_personality (rtx);
 231 #endif
 232 static void arm_asm_init_sections (void);
 233 static rtx arm_dwarf_register_span (rtx);
 234
 235 static tree arm_cxx_guard_type (void);
 236 static bool arm_cxx_guard_mask_bit (void);
 237 static tree arm_get_cookie_size (tree);
 238 static bool arm_cookie_has_size (void);
 239 static bool arm_cxx_cdtor_returns_this (void);
 240 static bool arm_cxx_key_method_may_be_inline (void);
 241 static void arm_cxx_determine_class_data_visibility (tree);
 242 static bool arm_cxx_class_data_always_comdat (void);
 243 static bool arm_cxx_use_aeabi_atexit (void);
 244 static void arm_init_libfuncs (void);
 245 static tree arm_build_builtin_va_list (void);
 246 static void arm_expand_builtin_va_start (tree, rtx);
 247 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 248 static void arm_option_override (void);
 249 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 250                                 struct cl_target_option *);
 251 static void arm_override_options_after_change (void);
 252 static void arm_option_print (FILE *, int, struct cl_target_option *);
 253 static void arm_set_current_function (tree);
 254 static bool arm_can_inline_p (tree, tree);
 255 static void arm_relayout_function (tree);
 256 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 257 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 258 static bool arm_sched_can_speculate_insn (rtx_insn *);
 259 static bool arm_macro_fusion_p (void);
 260 static bool arm_cannot_copy_insn_p (rtx_insn *);
 261 static int arm_issue_rate (void);
 262 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 263 static int arm_first_cycle_multipass_dfa_lookahead (void);
 264 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 265 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 266 static bool arm_output_addr_const_extra (FILE *, rtx);
 267 static bool arm_allocate_stack_slots_for_args (void);
 268 static bool arm_warn_func_return (tree);
 269 static tree arm_promoted_type (const_tree t);
 270 static bool arm_scalar_mode_supported_p (scalar_mode);
 271 static bool arm_frame_pointer_required (void);
 272 static bool arm_can_eliminate (const int, const int);
 273 static void arm_asm_trampoline_template (FILE *);
 274 static void arm_trampoline_init (rtx, tree, rtx);
 275 static rtx arm_trampoline_adjust_address (rtx);
 276 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 277 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 278 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 279 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 280 static bool arm_array_mode_supported_p (machine_mode,
 281                                         unsigned HOST_WIDE_INT);
 282 static machine_mode arm_preferred_simd_mode (scalar_mode);
 283 static bool arm_class_likely_spilled_p (reg_class_t);
 284 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 285 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 286 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 287                                                      const_tree type,
 288                                                      int misalignment,
 289                                                      bool is_packed);
 290 static void arm_conditional_register_usage (void);
 291 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 292 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 293 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 294 static int arm_default_branch_cost (bool, bool);
 295 static int arm_cortex_a5_branch_cost (bool, bool);
 296 static int arm_cortex_m_branch_cost (bool, bool);
 297 static int arm_cortex_m7_branch_cost (bool, bool);
 298
 299 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
 300                                           rtx, const vec_perm_indices &);
 301
 302 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 303
 304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 305                                            tree vectype,
 306                                            int misalign ATTRIBUTE_UNUSED);
 307
 308 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 309                                          bool op0_preserve_value);
 310 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 311
 312 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 313 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 314                                      const_tree);
 315 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 316 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 317 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 318                                                 int reloc);
 319 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 320 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 321 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 322 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 323 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 324 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 325 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 326                                        vec<machine_mode> &,
 327                                        vec<const char *> &, vec<rtx> &,
 328                                        HARD_REG_SET &, location_t);
 329 static const char *arm_identify_fpu_from_isa (sbitmap);
 330 \f
 331 /* Table of machine attributes.  */
 332 static const struct attribute_spec arm_attribute_table[] =
 333 {
 334   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 335        affects_type_identity, handler, exclude } */
 336   /* Function calls made to this symbol must be done indirectly, because
 337      it may lie outside of the 26 bit addressing range of a normal function
 338      call.  */
 339   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 340   /* Whereas these functions are always known to reside within the 26 bit
 341      addressing range.  */
 342   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 343   /* Specify the procedure call conventions for a function.  */
 344   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 345     NULL },
 346   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 347   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 348     NULL },
 349   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 350     NULL },
 351   { "naked",        0, 0, true,  false, false, false,
 352     arm_handle_fndecl_attribute, NULL },
 353 #ifdef ARM_PE
 354   /* ARM/PE has three new attributes:
 355      interfacearm - ?
 356      dllexport - for exporting a function/variable that will live in a dll
 357      dllimport - for importing a function/variable from a dll
 358
 359      Microsoft allows multiple declspecs in one __declspec, separating
 360      them with spaces.  We do NOT support this.  Instead, use __declspec
 361      multiple times.
 362   */
 363   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 364   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 365   { "interfacearm", 0, 0, true,  false, false, false,
 366     arm_handle_fndecl_attribute, NULL },
 367 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 368   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 369     NULL },
 370   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 371     NULL },
 372   { "notshared",    0, 0, false, true, false, false,
 373     arm_handle_notshared_attribute, NULL },
 374 #endif
 375   /* ARMv8-M Security Extensions support.  */
 376   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 377     arm_handle_cmse_nonsecure_entry, NULL },
 378   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
 379     arm_handle_cmse_nonsecure_call, NULL },
 380   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
 381   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 382 };
 383 \f
 384 /* Initialize the GCC target structure.  */
 385 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 386 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 387 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 388 #endif
 389
 390 #undef TARGET_CHECK_BUILTIN_CALL
 391 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 392
 393 #undef TARGET_LEGITIMIZE_ADDRESS
 394 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 395
 396 #undef  TARGET_ATTRIBUTE_TABLE
 397 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 398
 399 #undef  TARGET_INSERT_ATTRIBUTES
 400 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 401
 402 #undef TARGET_ASM_FILE_START
 403 #define TARGET_ASM_FILE_START arm_file_start
 404 #undef TARGET_ASM_FILE_END
 405 #define TARGET_ASM_FILE_END arm_file_end
 406
 407 #undef  TARGET_ASM_ALIGNED_SI_OP
 408 #define TARGET_ASM_ALIGNED_SI_OP NULL
 409 #undef  TARGET_ASM_INTEGER
 410 #define TARGET_ASM_INTEGER arm_assemble_integer
 411
 412 #undef TARGET_PRINT_OPERAND
 413 #define TARGET_PRINT_OPERAND arm_print_operand
 414 #undef TARGET_PRINT_OPERAND_ADDRESS
 415 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 418
 419 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 420 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 421
 422 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 423 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 424
 425 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 426 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 427
 428 #undef TARGET_CAN_INLINE_P
 429 #define TARGET_CAN_INLINE_P arm_can_inline_p
 430
 431 #undef TARGET_RELAYOUT_FUNCTION
 432 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 433
 434 #undef  TARGET_OPTION_OVERRIDE
 435 #define TARGET_OPTION_OVERRIDE arm_option_override
 436
 437 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 438 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 439
 440 #undef TARGET_OPTION_RESTORE
 441 #define TARGET_OPTION_RESTORE arm_option_restore
 442
 443 #undef TARGET_OPTION_PRINT
 444 #define TARGET_OPTION_PRINT arm_option_print
 445
 446 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 447 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 448
 449 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 450 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 451
 452 #undef TARGET_SCHED_MACRO_FUSION_P
 453 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 454
 455 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 456 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 457
 458 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 459 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 460
 461 #undef  TARGET_SCHED_ADJUST_COST
 462 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 463
 464 #undef TARGET_SET_CURRENT_FUNCTION
 465 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 466
 467 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 468 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 469
 470 #undef TARGET_SCHED_REORDER
 471 #define TARGET_SCHED_REORDER arm_sched_reorder
 472
 473 #undef TARGET_REGISTER_MOVE_COST
 474 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 475
 476 #undef TARGET_MEMORY_MOVE_COST
 477 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 478
 479 #undef TARGET_ENCODE_SECTION_INFO
 480 #ifdef ARM_PE
 481 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 482 #else
 483 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 484 #endif
 485
 486 #undef  TARGET_STRIP_NAME_ENCODING
 487 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 488
 489 #undef  TARGET_ASM_INTERNAL_LABEL
 490 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 491
 492 #undef TARGET_FLOATN_MODE
 493 #define TARGET_FLOATN_MODE arm_floatn_mode
 494
 495 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 496 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 497
 498 #undef  TARGET_FUNCTION_VALUE
 499 #define TARGET_FUNCTION_VALUE arm_function_value
 500
 501 #undef  TARGET_LIBCALL_VALUE
 502 #define TARGET_LIBCALL_VALUE arm_libcall_value
 503
 504 #undef TARGET_FUNCTION_VALUE_REGNO_P
 505 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 506
 507 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 508 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 509 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 511
 512 #undef  TARGET_RTX_COSTS
 513 #define TARGET_RTX_COSTS arm_rtx_costs
 514 #undef  TARGET_ADDRESS_COST
 515 #define TARGET_ADDRESS_COST arm_address_cost
 516 #undef TARGET_INSN_COST
 517 #define TARGET_INSN_COST arm_insn_cost
 518
 519 #undef TARGET_SHIFT_TRUNCATION_MASK
 520 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 521 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 522 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 523 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 524 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 525 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 526 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 527 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 528 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 529   arm_autovectorize_vector_modes
 530
 531 #undef  TARGET_MACHINE_DEPENDENT_REORG
 532 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 533
 534 #undef  TARGET_INIT_BUILTINS
 535 #define TARGET_INIT_BUILTINS  arm_init_builtins
 536 #undef  TARGET_EXPAND_BUILTIN
 537 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 538 #undef  TARGET_BUILTIN_DECL
 539 #define TARGET_BUILTIN_DECL arm_builtin_decl
 540
 541 #undef TARGET_INIT_LIBFUNCS
 542 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 543
 544 #undef TARGET_PROMOTE_FUNCTION_MODE
 545 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 546 #undef TARGET_PROMOTE_PROTOTYPES
 547 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 548 #undef TARGET_PASS_BY_REFERENCE
 549 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 550 #undef TARGET_ARG_PARTIAL_BYTES
 551 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 552 #undef TARGET_FUNCTION_ARG
 553 #define TARGET_FUNCTION_ARG arm_function_arg
 554 #undef TARGET_FUNCTION_ARG_ADVANCE
 555 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 556 #undef TARGET_FUNCTION_ARG_PADDING
 557 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 558 #undef TARGET_FUNCTION_ARG_BOUNDARY
 559 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 560
 561 #undef  TARGET_SETUP_INCOMING_VARARGS
 562 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 563
 564 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 565 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 566
 567 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 568 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 569 #undef TARGET_TRAMPOLINE_INIT
 570 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 571 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 572 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 573
 574 #undef TARGET_WARN_FUNC_RETURN
 575 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 576
 577 #undef TARGET_DEFAULT_SHORT_ENUMS
 578 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 579
 580 #undef TARGET_ALIGN_ANON_BITFIELD
 581 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 582
 583 #undef TARGET_NARROW_VOLATILE_BITFIELD
 584 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 585
 586 #undef TARGET_CXX_GUARD_TYPE
 587 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 588
 589 #undef TARGET_CXX_GUARD_MASK_BIT
 590 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 591
 592 #undef TARGET_CXX_GET_COOKIE_SIZE
 593 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 594
 595 #undef TARGET_CXX_COOKIE_HAS_SIZE
 596 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 597
 598 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 599 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 600
 601 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 602 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 603
 604 #undef TARGET_CXX_USE_AEABI_ATEXIT
 605 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 606
 607 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 608 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 609   arm_cxx_determine_class_data_visibility
 610
 611 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 612 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 613
 614 #undef TARGET_RETURN_IN_MSB
 615 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 616
 617 #undef TARGET_RETURN_IN_MEMORY
 618 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 619
 620 #undef TARGET_MUST_PASS_IN_STACK
 621 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 622
 623 #if ARM_UNWIND_INFO
 624 #undef TARGET_ASM_UNWIND_EMIT
 625 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 626
 627 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 628 #undef TARGET_ASM_TTYPE
 629 #define TARGET_ASM_TTYPE arm_output_ttype
 630
 631 #undef TARGET_ARM_EABI_UNWINDER
 632 #define TARGET_ARM_EABI_UNWINDER true
 633
 634 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 635 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 636
 637 #endif /* ARM_UNWIND_INFO */
 638
 639 #undef TARGET_ASM_INIT_SECTIONS
 640 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 641
 642 #undef TARGET_DWARF_REGISTER_SPAN
 643 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 644
 645 #undef  TARGET_CANNOT_COPY_INSN_P
 646 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 647
 648 #ifdef HAVE_AS_TLS
 649 #undef TARGET_HAVE_TLS
 650 #define TARGET_HAVE_TLS true
 651 #endif
 652
 653 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 654 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 655
 656 #undef TARGET_LEGITIMATE_CONSTANT_P
 657 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 658
 659 #undef TARGET_CANNOT_FORCE_CONST_MEM
 660 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 661
 662 #undef TARGET_MAX_ANCHOR_OFFSET
 663 #define TARGET_MAX_ANCHOR_OFFSET 4095
 664
 665 /* The minimum is set such that the total size of the block
 666    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 667    divisible by eight, ensuring natural spacing of anchors.  */
 668 #undef TARGET_MIN_ANCHOR_OFFSET
 669 #define TARGET_MIN_ANCHOR_OFFSET -4088
 670
 671 #undef TARGET_SCHED_ISSUE_RATE
 672 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 673
 674 #undef TARGET_SCHED_VARIABLE_ISSUE
 675 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 676
 677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 679   arm_first_cycle_multipass_dfa_lookahead
 680
 681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 683   arm_first_cycle_multipass_dfa_lookahead_guard
 684
 685 #undef TARGET_MANGLE_TYPE
 686 #define TARGET_MANGLE_TYPE arm_mangle_type
 687
 688 #undef TARGET_INVALID_CONVERSION
 689 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 690
 691 #undef TARGET_INVALID_UNARY_OP
 692 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 693
 694 #undef TARGET_INVALID_BINARY_OP
 695 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 696
 697 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 698 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 699
 700 #undef TARGET_BUILD_BUILTIN_VA_LIST
 701 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 702 #undef TARGET_EXPAND_BUILTIN_VA_START
 703 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 704 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 705 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 706
 707 #ifdef HAVE_AS_TLS
 708 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 709 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 710 #endif
 711
 712 #undef TARGET_LEGITIMATE_ADDRESS_P
 713 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 714
 715 #undef TARGET_PREFERRED_RELOAD_CLASS
 716 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 717
 718 #undef TARGET_PROMOTED_TYPE
 719 #define TARGET_PROMOTED_TYPE arm_promoted_type
 720
 721 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 722 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 723
 724 #undef TARGET_COMPUTE_FRAME_LAYOUT
 725 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 726
 727 #undef TARGET_FRAME_POINTER_REQUIRED
 728 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 729
 730 #undef TARGET_CAN_ELIMINATE
 731 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 732
 733 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 734 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 735
 736 #undef TARGET_CLASS_LIKELY_SPILLED_P
 737 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 738
 739 #undef TARGET_VECTORIZE_BUILTINS
 740 #define TARGET_VECTORIZE_BUILTINS
 741
 742 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 743 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 744   arm_builtin_vectorized_function
 745
 746 #undef TARGET_VECTOR_ALIGNMENT
 747 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 748
 749 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 750 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 751   arm_vector_alignment_reachable
 752
 753 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 754 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 755   arm_builtin_support_vector_misalignment
 756
 757 #undef TARGET_PREFERRED_RENAME_CLASS
 758 #define TARGET_PREFERRED_RENAME_CLASS \
 759   arm_preferred_rename_class
 760
 761 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 762 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 763
 764 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 765 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 766   arm_builtin_vectorization_cost
 767
 768 #undef TARGET_CANONICALIZE_COMPARISON
 769 #define TARGET_CANONICALIZE_COMPARISON \
 770   arm_canonicalize_comparison
 771
 772 #undef TARGET_ASAN_SHADOW_OFFSET
 773 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 774
 775 #undef MAX_INSN_PER_IT_BLOCK
 776 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 777
 778 #undef TARGET_CAN_USE_DOLOOP_P
 779 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 780
 781 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 782 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 783
 784 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 785 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 786
 787 #undef TARGET_SCHED_FUSION_PRIORITY
 788 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 789
 790 #undef  TARGET_ASM_FUNCTION_SECTION
 791 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 792
 793 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 794 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 795
 796 #undef TARGET_SECTION_TYPE_FLAGS
 797 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 798
 799 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 800 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 801
 802 #undef TARGET_C_EXCESS_PRECISION
 803 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 804
 805 /* Although the architecture reserves bits 0 and 1, only the former is
 806    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 807 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 808 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 809
 810 #undef TARGET_FIXED_CONDITION_CODE_REGS
 811 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 812
 813 #undef TARGET_HARD_REGNO_NREGS
 814 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 815 #undef TARGET_HARD_REGNO_MODE_OK
 816 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 817
 818 #undef TARGET_MODES_TIEABLE_P
 819 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 820
 821 #undef TARGET_CAN_CHANGE_MODE_CLASS
 822 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 823
 824 #undef TARGET_CONSTANT_ALIGNMENT
 825 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 826
 827 #undef TARGET_INVALID_WITHIN_DOLOOP
 828 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 829
 830 #undef TARGET_MD_ASM_ADJUST
 831 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 832
 833 #undef TARGET_STACK_PROTECT_GUARD
 834 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 835
 836 #undef TARGET_VECTORIZE_GET_MASK_MODE
 837 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 838 \f
 839 /* Obstack for minipool constant handling.  */
 840 static struct obstack minipool_obstack;
 841 static char *         minipool_startobj;
 842
 843 /* The maximum number of insns skipped which
 844    will be conditionalised if possible.  */
 845 static int max_insns_skipped = 5;
 846
 847 /* True if we are currently building a constant table.  */
 848 int making_const_table;
 849
 850 /* The processor for which instructions should be scheduled.  */
 851 enum processor_type arm_tune = TARGET_CPU_arm_none;
 852
 853 /* The current tuning set.  */
 854 const struct tune_params *current_tune;
 855
 856 /* Which floating point hardware to schedule for.  */
 857 int arm_fpu_attr;
 858
 859 /* Used for Thumb call_via trampolines.  */
 860 rtx thumb_call_via_label[14];
 861 static int thumb_call_reg_needed;
 862
 863 /* The bits in this mask specify which instruction scheduling options should
 864    be used.  */
 865 unsigned int tune_flags = 0;
 866
 867 /* The highest ARM architecture version supported by the
 868    target.  */
 869 enum base_architecture arm_base_arch = BASE_ARCH_0;
 870
 871 /* Active target architecture and tuning.  */
 872
 873 struct arm_build_target arm_active_target;
 874
 875 /* The following are used in the arm.md file as equivalents to bits
 876    in the above two flag variables.  */
 877
 878 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 879 int arm_arch4 = 0;
 880
 881 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 882 int arm_arch4t = 0;
 883
 884 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 885 int arm_arch5t = 0;
 886
 887 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 888 int arm_arch5te = 0;
 889
 890 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 891 int arm_arch6 = 0;
 892
 893 /* Nonzero if this chip supports the ARM 6K extensions.  */
 894 int arm_arch6k = 0;
 895
 896 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 897 int arm_arch6kz = 0;
 898
 899 /* Nonzero if instructions present in ARMv6-M can be used.  */
 900 int arm_arch6m = 0;
 901
 902 /* Nonzero if this chip supports the ARM 7 extensions.  */
 903 int arm_arch7 = 0;
 904
 905 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 906 int arm_arch_lpae = 0;
 907
 908 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 909 int arm_arch_notm = 0;
 910
 911 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 912 int arm_arch7em = 0;
 913
 914 /* Nonzero if instructions present in ARMv8 can be used.  */
 915 int arm_arch8 = 0;
 916
 917 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 918 int arm_arch8_1 = 0;
 919
 920 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 921 int arm_arch8_2 = 0;
 922
 923 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 924 int arm_arch8_3 = 0;
 925
 926 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 927 int arm_arch8_4 = 0;
 928 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 929    extensions.  */
 930 int arm_arch8_1m_main = 0;
 931
 932 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 933    Architecture 8.2.  */
 934 int arm_fp16_inst = 0;
 935
 936 /* Nonzero if this chip can benefit from load scheduling.  */
 937 int arm_ld_sched = 0;
 938
 939 /* Nonzero if this chip is a StrongARM.  */
 940 int arm_tune_strongarm = 0;
 941
 942 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 943 int arm_arch_iwmmxt = 0;
 944
 945 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 946 int arm_arch_iwmmxt2 = 0;
 947
 948 /* Nonzero if this chip is an XScale.  */
 949 int arm_arch_xscale = 0;
 950
 951 /* Nonzero if tuning for XScale  */
 952 int arm_tune_xscale = 0;
 953
 954 /* Nonzero if we want to tune for stores that access the write-buffer.
 955    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 956 int arm_tune_wbuf = 0;
 957
 958 /* Nonzero if tuning for Cortex-A9.  */
 959 int arm_tune_cortex_a9 = 0;
 960
 961 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 962    preprocessor.
 963    XXX This is a bit of a hack, it's intended to help work around
 964    problems in GLD which doesn't understand that armv5t code is
 965    interworking clean.  */
 966 int arm_cpp_interwork = 0;
 967
 968 /* Nonzero if chip supports Thumb 1.  */
 969 int arm_arch_thumb1;
 970
 971 /* Nonzero if chip supports Thumb 2.  */
 972 int arm_arch_thumb2;
 973
 974 /* Nonzero if chip supports integer division instruction.  */
 975 int arm_arch_arm_hwdiv;
 976 int arm_arch_thumb_hwdiv;
 977
 978 /* Nonzero if chip disallows volatile memory access in IT block.  */
 979 int arm_arch_no_volatile_ce;
 980
 981 /* Nonzero if we shouldn't use literal pools.  */
 982 bool arm_disable_literal_pool = false;
 983
 984 /* The register number to be used for the PIC offset register.  */
 985 unsigned arm_pic_register = INVALID_REGNUM;
 986
 987 enum arm_pcs arm_pcs_default;
 988
 989 /* For an explanation of these variables, see final_prescan_insn below.  */
 990 int arm_ccfsm_state;
 991 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 992 enum arm_cond_code arm_current_cc;
 993
 994 rtx arm_target_insn;
 995 int arm_target_label;
 996 /* The number of conditionally executed insns, including the current insn.  */
 997 int arm_condexec_count = 0;
 998 /* A bitmask specifying the patterns for the IT block.
 999    Zero means do not output an IT block before this insn. */
1000 int arm_condexec_mask = 0;
1001 /* The number of bits used in arm_condexec_mask.  */
1002 int arm_condexec_masklen = 0;
1003
1004 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1005 int arm_arch_crc = 0;
1006
1007 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1008 int arm_arch_dotprod = 0;
1009
1010 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1011 int arm_arch_cmse = 0;
1012
1013 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1014 int arm_m_profile_small_mul = 0;
1015
1016 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1017 int arm_arch_i8mm = 0;
1018
1019 /* Nonzero if chip supports the BFloat16 instructions.  */
1020 int arm_arch_bf16 = 0;
1021
1022 /* Nonzero if chip supports the Custom Datapath Extension.  */
1023 int arm_arch_cde = 0;
1024 int arm_arch_cde_coproc = 0;
1025 const int arm_arch_cde_coproc_bits[] = {
1026   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1027 };
1028
1029 /* The condition codes of the ARM, and the inverse function.  */
1030 static const char * const arm_condition_codes[] =
1031 {
1032   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1033   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1034 };
1035
1036 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1037 int arm_regs_in_sequence[] =
1038 {
1039   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1040 };
1041
1042 #define DEF_FP_SYSREG(reg) #reg,
1043 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1044   FP_SYSREGS
1045 };
1046 #undef DEF_FP_SYSREG
1047
1048 #define ARM_LSL_NAME "lsl"
1049 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1050
1051 #define THUMB2_WORK_REGS                                        \
1052   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)              \
1053             | (1 << SP_REGNUM)                                  \
1054             | (1 << PC_REGNUM)                                  \
1055             | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM        \
1056                ? (1 << PIC_OFFSET_TABLE_REGNUM)                 \
1057                : 0)))
1058 \f
1059 /* Initialization code.  */
1060
1061 struct cpu_tune
1062 {
1063   enum processor_type scheduler;
1064   unsigned int tune_flags;
1065   const struct tune_params *tune;
1066 };
1067
1068 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1069 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1070   {                                                             \
1071     num_slots,                                                  \
1072     l1_size,                                                    \
1073     l1_line_size                                                \
1074   }
1075
1076 /* arm generic vectorizer costs.  */
1077 static const
1078 struct cpu_vec_costs arm_default_vec_cost = {
1079   1,                                    /* scalar_stmt_cost.  */
1080   1,                                    /* scalar load_cost.  */
1081   1,                                    /* scalar_store_cost.  */
1082   1,                                    /* vec_stmt_cost.  */
1083   1,                                    /* vec_to_scalar_cost.  */
1084   1,                                    /* scalar_to_vec_cost.  */
1085   1,                                    /* vec_align_load_cost.  */
1086   1,                                    /* vec_unalign_load_cost.  */
1087   1,                                    /* vec_unalign_store_cost.  */
1088   1,                                    /* vec_store_cost.  */
1089   3,                                    /* cond_taken_branch_cost.  */
1090   1,                                    /* cond_not_taken_branch_cost.  */
1091 };
1092
1093 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1094 #include "aarch-cost-tables.h"
1095
1096
1097
1098 const struct cpu_cost_table cortexa9_extra_costs =
1099 {
1100   /* ALU */
1101   {
1102     0,                  /* arith.  */
1103     0,                  /* logical.  */
1104     0,                  /* shift.  */
1105     COSTS_N_INSNS (1),  /* shift_reg.  */
1106     COSTS_N_INSNS (1),  /* arith_shift.  */
1107     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1108     0,                  /* log_shift.  */
1109     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1110     COSTS_N_INSNS (1),  /* extend.  */
1111     COSTS_N_INSNS (2),  /* extend_arith.  */
1112     COSTS_N_INSNS (1),  /* bfi.  */
1113     COSTS_N_INSNS (1),  /* bfx.  */
1114     0,                  /* clz.  */
1115     0,                  /* rev.  */
1116     0,                  /* non_exec.  */
1117     true                /* non_exec_costs_exec.  */
1118   },
1119   {
1120     /* MULT SImode */
1121     {
1122       COSTS_N_INSNS (3),        /* simple.  */
1123       COSTS_N_INSNS (3),        /* flag_setting.  */
1124       COSTS_N_INSNS (2),        /* extend.  */
1125       COSTS_N_INSNS (3),        /* add.  */
1126       COSTS_N_INSNS (2),        /* extend_add.  */
1127       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1128     },
1129     /* MULT DImode */
1130     {
1131       0,                        /* simple (N/A).  */
1132       0,                        /* flag_setting (N/A).  */
1133       COSTS_N_INSNS (4),        /* extend.  */
1134       0,                        /* add (N/A).  */
1135       COSTS_N_INSNS (4),        /* extend_add.  */
1136       0                         /* idiv (N/A).  */
1137     }
1138   },
1139   /* LD/ST */
1140   {
1141     COSTS_N_INSNS (2),  /* load.  */
1142     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1143     COSTS_N_INSNS (2),  /* ldrd.  */
1144     COSTS_N_INSNS (2),  /* ldm_1st.  */
1145     1,                  /* ldm_regs_per_insn_1st.  */
1146     2,                  /* ldm_regs_per_insn_subsequent.  */
1147     COSTS_N_INSNS (5),  /* loadf.  */
1148     COSTS_N_INSNS (5),  /* loadd.  */
1149     COSTS_N_INSNS (1),  /* load_unaligned.  */
1150     COSTS_N_INSNS (2),  /* store.  */
1151     COSTS_N_INSNS (2),  /* strd.  */
1152     COSTS_N_INSNS (2),  /* stm_1st.  */
1153     1,                  /* stm_regs_per_insn_1st.  */
1154     2,                  /* stm_regs_per_insn_subsequent.  */
1155     COSTS_N_INSNS (1),  /* storef.  */
1156     COSTS_N_INSNS (1),  /* stored.  */
1157     COSTS_N_INSNS (1),  /* store_unaligned.  */
1158     COSTS_N_INSNS (1),  /* loadv.  */
1159     COSTS_N_INSNS (1)   /* storev.  */
1160   },
1161   {
1162     /* FP SFmode */
1163     {
1164       COSTS_N_INSNS (14),       /* div.  */
1165       COSTS_N_INSNS (4),        /* mult.  */
1166       COSTS_N_INSNS (7),        /* mult_addsub. */
1167       COSTS_N_INSNS (30),       /* fma.  */
1168       COSTS_N_INSNS (3),        /* addsub.  */
1169       COSTS_N_INSNS (1),        /* fpconst.  */
1170       COSTS_N_INSNS (1),        /* neg.  */
1171       COSTS_N_INSNS (3),        /* compare.  */
1172       COSTS_N_INSNS (3),        /* widen.  */
1173       COSTS_N_INSNS (3),        /* narrow.  */
1174       COSTS_N_INSNS (3),        /* toint.  */
1175       COSTS_N_INSNS (3),        /* fromint.  */
1176       COSTS_N_INSNS (3)         /* roundint.  */
1177     },
1178     /* FP DFmode */
1179     {
1180       COSTS_N_INSNS (24),       /* div.  */
1181       COSTS_N_INSNS (5),        /* mult.  */
1182       COSTS_N_INSNS (8),        /* mult_addsub.  */
1183       COSTS_N_INSNS (30),       /* fma.  */
1184       COSTS_N_INSNS (3),        /* addsub.  */
1185       COSTS_N_INSNS (1),        /* fpconst.  */
1186       COSTS_N_INSNS (1),        /* neg.  */
1187       COSTS_N_INSNS (3),        /* compare.  */
1188       COSTS_N_INSNS (3),        /* widen.  */
1189       COSTS_N_INSNS (3),        /* narrow.  */
1190       COSTS_N_INSNS (3),        /* toint.  */
1191       COSTS_N_INSNS (3),        /* fromint.  */
1192       COSTS_N_INSNS (3)         /* roundint.  */
1193     }
1194   },
1195   /* Vector */
1196   {
1197     COSTS_N_INSNS (1),  /* alu.  */
1198     COSTS_N_INSNS (4),  /* mult.  */
1199     COSTS_N_INSNS (1),  /* movi.  */
1200     COSTS_N_INSNS (2),  /* dup.  */
1201     COSTS_N_INSNS (2)   /* extract.  */
1202   }
1203 };
1204
1205 const struct cpu_cost_table cortexa8_extra_costs =
1206 {
1207   /* ALU */
1208   {
1209     0,                  /* arith.  */
1210     0,                  /* logical.  */
1211     COSTS_N_INSNS (1),  /* shift.  */
1212     0,                  /* shift_reg.  */
1213     COSTS_N_INSNS (1),  /* arith_shift.  */
1214     0,                  /* arith_shift_reg.  */
1215     COSTS_N_INSNS (1),  /* log_shift.  */
1216     0,                  /* log_shift_reg.  */
1217     0,                  /* extend.  */
1218     0,                  /* extend_arith.  */
1219     0,                  /* bfi.  */
1220     0,                  /* bfx.  */
1221     0,                  /* clz.  */
1222     0,                  /* rev.  */
1223     0,                  /* non_exec.  */
1224     true                /* non_exec_costs_exec.  */
1225   },
1226   {
1227     /* MULT SImode */
1228     {
1229       COSTS_N_INSNS (1),        /* simple.  */
1230       COSTS_N_INSNS (1),        /* flag_setting.  */
1231       COSTS_N_INSNS (1),        /* extend.  */
1232       COSTS_N_INSNS (1),        /* add.  */
1233       COSTS_N_INSNS (1),        /* extend_add.  */
1234       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1235     },
1236     /* MULT DImode */
1237     {
1238       0,                        /* simple (N/A).  */
1239       0,                        /* flag_setting (N/A).  */
1240       COSTS_N_INSNS (2),        /* extend.  */
1241       0,                        /* add (N/A).  */
1242       COSTS_N_INSNS (2),        /* extend_add.  */
1243       0                         /* idiv (N/A).  */
1244     }
1245   },
1246   /* LD/ST */
1247   {
1248     COSTS_N_INSNS (1),  /* load.  */
1249     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1250     COSTS_N_INSNS (1),  /* ldrd.  */
1251     COSTS_N_INSNS (1),  /* ldm_1st.  */
1252     1,                  /* ldm_regs_per_insn_1st.  */
1253     2,                  /* ldm_regs_per_insn_subsequent.  */
1254     COSTS_N_INSNS (1),  /* loadf.  */
1255     COSTS_N_INSNS (1),  /* loadd.  */
1256     COSTS_N_INSNS (1),  /* load_unaligned.  */
1257     COSTS_N_INSNS (1),  /* store.  */
1258     COSTS_N_INSNS (1),  /* strd.  */
1259     COSTS_N_INSNS (1),  /* stm_1st.  */
1260     1,                  /* stm_regs_per_insn_1st.  */
1261     2,                  /* stm_regs_per_insn_subsequent.  */
1262     COSTS_N_INSNS (1),  /* storef.  */
1263     COSTS_N_INSNS (1),  /* stored.  */
1264     COSTS_N_INSNS (1),  /* store_unaligned.  */
1265     COSTS_N_INSNS (1),  /* loadv.  */
1266     COSTS_N_INSNS (1)   /* storev.  */
1267   },
1268   {
1269     /* FP SFmode */
1270     {
1271       COSTS_N_INSNS (36),       /* div.  */
1272       COSTS_N_INSNS (11),       /* mult.  */
1273       COSTS_N_INSNS (20),       /* mult_addsub. */
1274       COSTS_N_INSNS (30),       /* fma.  */
1275       COSTS_N_INSNS (9),        /* addsub.  */
1276       COSTS_N_INSNS (3),        /* fpconst.  */
1277       COSTS_N_INSNS (3),        /* neg.  */
1278       COSTS_N_INSNS (6),        /* compare.  */
1279       COSTS_N_INSNS (4),        /* widen.  */
1280       COSTS_N_INSNS (4),        /* narrow.  */
1281       COSTS_N_INSNS (8),        /* toint.  */
1282       COSTS_N_INSNS (8),        /* fromint.  */
1283       COSTS_N_INSNS (8)         /* roundint.  */
1284     },
1285     /* FP DFmode */
1286     {
1287       COSTS_N_INSNS (64),       /* div.  */
1288       COSTS_N_INSNS (16),       /* mult.  */
1289       COSTS_N_INSNS (25),       /* mult_addsub.  */
1290       COSTS_N_INSNS (30),       /* fma.  */
1291       COSTS_N_INSNS (9),        /* addsub.  */
1292       COSTS_N_INSNS (3),        /* fpconst.  */
1293       COSTS_N_INSNS (3),        /* neg.  */
1294       COSTS_N_INSNS (6),        /* compare.  */
1295       COSTS_N_INSNS (6),        /* widen.  */
1296       COSTS_N_INSNS (6),        /* narrow.  */
1297       COSTS_N_INSNS (8),        /* toint.  */
1298       COSTS_N_INSNS (8),        /* fromint.  */
1299       COSTS_N_INSNS (8)         /* roundint.  */
1300     }
1301   },
1302   /* Vector */
1303   {
1304     COSTS_N_INSNS (1),  /* alu.  */
1305     COSTS_N_INSNS (4),  /* mult.  */
1306     COSTS_N_INSNS (1),  /* movi.  */
1307     COSTS_N_INSNS (2),  /* dup.  */
1308     COSTS_N_INSNS (2)   /* extract.  */
1309   }
1310 };
1311
1312 const struct cpu_cost_table cortexa5_extra_costs =
1313 {
1314   /* ALU */
1315   {
1316     0,                  /* arith.  */
1317     0,                  /* logical.  */
1318     COSTS_N_INSNS (1),  /* shift.  */
1319     COSTS_N_INSNS (1),  /* shift_reg.  */
1320     COSTS_N_INSNS (1),  /* arith_shift.  */
1321     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1322     COSTS_N_INSNS (1),  /* log_shift.  */
1323     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1324     COSTS_N_INSNS (1),  /* extend.  */
1325     COSTS_N_INSNS (1),  /* extend_arith.  */
1326     COSTS_N_INSNS (1),  /* bfi.  */
1327     COSTS_N_INSNS (1),  /* bfx.  */
1328     COSTS_N_INSNS (1),  /* clz.  */
1329     COSTS_N_INSNS (1),  /* rev.  */
1330     0,                  /* non_exec.  */
1331     true                /* non_exec_costs_exec.  */
1332   },
1333
1334   {
1335     /* MULT SImode */
1336     {
1337       0,                        /* simple.  */
1338       COSTS_N_INSNS (1),        /* flag_setting.  */
1339       COSTS_N_INSNS (1),        /* extend.  */
1340       COSTS_N_INSNS (1),        /* add.  */
1341       COSTS_N_INSNS (1),        /* extend_add.  */
1342       COSTS_N_INSNS (7)         /* idiv.  */
1343     },
1344     /* MULT DImode */
1345     {
1346       0,                        /* simple (N/A).  */
1347       0,                        /* flag_setting (N/A).  */
1348       COSTS_N_INSNS (1),        /* extend.  */
1349       0,                        /* add.  */
1350       COSTS_N_INSNS (2),        /* extend_add.  */
1351       0                         /* idiv (N/A).  */
1352     }
1353   },
1354   /* LD/ST */
1355   {
1356     COSTS_N_INSNS (1),  /* load.  */
1357     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1358     COSTS_N_INSNS (6),  /* ldrd.  */
1359     COSTS_N_INSNS (1),  /* ldm_1st.  */
1360     1,                  /* ldm_regs_per_insn_1st.  */
1361     2,                  /* ldm_regs_per_insn_subsequent.  */
1362     COSTS_N_INSNS (2),  /* loadf.  */
1363     COSTS_N_INSNS (4),  /* loadd.  */
1364     COSTS_N_INSNS (1),  /* load_unaligned.  */
1365     COSTS_N_INSNS (1),  /* store.  */
1366     COSTS_N_INSNS (3),  /* strd.  */
1367     COSTS_N_INSNS (1),  /* stm_1st.  */
1368     1,                  /* stm_regs_per_insn_1st.  */
1369     2,                  /* stm_regs_per_insn_subsequent.  */
1370     COSTS_N_INSNS (2),  /* storef.  */
1371     COSTS_N_INSNS (2),  /* stored.  */
1372     COSTS_N_INSNS (1),  /* store_unaligned.  */
1373     COSTS_N_INSNS (1),  /* loadv.  */
1374     COSTS_N_INSNS (1)   /* storev.  */
1375   },
1376   {
1377     /* FP SFmode */
1378     {
1379       COSTS_N_INSNS (15),       /* div.  */
1380       COSTS_N_INSNS (3),        /* mult.  */
1381       COSTS_N_INSNS (7),        /* mult_addsub. */
1382       COSTS_N_INSNS (7),        /* fma.  */
1383       COSTS_N_INSNS (3),        /* addsub.  */
1384       COSTS_N_INSNS (3),        /* fpconst.  */
1385       COSTS_N_INSNS (3),        /* neg.  */
1386       COSTS_N_INSNS (3),        /* compare.  */
1387       COSTS_N_INSNS (3),        /* widen.  */
1388       COSTS_N_INSNS (3),        /* narrow.  */
1389       COSTS_N_INSNS (3),        /* toint.  */
1390       COSTS_N_INSNS (3),        /* fromint.  */
1391       COSTS_N_INSNS (3)         /* roundint.  */
1392     },
1393     /* FP DFmode */
1394     {
1395       COSTS_N_INSNS (30),       /* div.  */
1396       COSTS_N_INSNS (6),        /* mult.  */
1397       COSTS_N_INSNS (10),       /* mult_addsub.  */
1398       COSTS_N_INSNS (7),        /* fma.  */
1399       COSTS_N_INSNS (3),        /* addsub.  */
1400       COSTS_N_INSNS (3),        /* fpconst.  */
1401       COSTS_N_INSNS (3),        /* neg.  */
1402       COSTS_N_INSNS (3),        /* compare.  */
1403       COSTS_N_INSNS (3),        /* widen.  */
1404       COSTS_N_INSNS (3),        /* narrow.  */
1405       COSTS_N_INSNS (3),        /* toint.  */
1406       COSTS_N_INSNS (3),        /* fromint.  */
1407       COSTS_N_INSNS (3)         /* roundint.  */
1408     }
1409   },
1410   /* Vector */
1411   {
1412     COSTS_N_INSNS (1),  /* alu.  */
1413     COSTS_N_INSNS (4),  /* mult.  */
1414     COSTS_N_INSNS (1),  /* movi.  */
1415     COSTS_N_INSNS (2),  /* dup.  */
1416     COSTS_N_INSNS (2)   /* extract.  */
1417   }
1418 };
1419
1420
1421 const struct cpu_cost_table cortexa7_extra_costs =
1422 {
1423   /* ALU */
1424   {
1425     0,                  /* arith.  */
1426     0,                  /* logical.  */
1427     COSTS_N_INSNS (1),  /* shift.  */
1428     COSTS_N_INSNS (1),  /* shift_reg.  */
1429     COSTS_N_INSNS (1),  /* arith_shift.  */
1430     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1431     COSTS_N_INSNS (1),  /* log_shift.  */
1432     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1433     COSTS_N_INSNS (1),  /* extend.  */
1434     COSTS_N_INSNS (1),  /* extend_arith.  */
1435     COSTS_N_INSNS (1),  /* bfi.  */
1436     COSTS_N_INSNS (1),  /* bfx.  */
1437     COSTS_N_INSNS (1),  /* clz.  */
1438     COSTS_N_INSNS (1),  /* rev.  */
1439     0,                  /* non_exec.  */
1440     true                /* non_exec_costs_exec.  */
1441   },
1442
1443   {
1444     /* MULT SImode */
1445     {
1446       0,                        /* simple.  */
1447       COSTS_N_INSNS (1),        /* flag_setting.  */
1448       COSTS_N_INSNS (1),        /* extend.  */
1449       COSTS_N_INSNS (1),        /* add.  */
1450       COSTS_N_INSNS (1),        /* extend_add.  */
1451       COSTS_N_INSNS (7)         /* idiv.  */
1452     },
1453     /* MULT DImode */
1454     {
1455       0,                        /* simple (N/A).  */
1456       0,                        /* flag_setting (N/A).  */
1457       COSTS_N_INSNS (1),        /* extend.  */
1458       0,                        /* add.  */
1459       COSTS_N_INSNS (2),        /* extend_add.  */
1460       0                         /* idiv (N/A).  */
1461     }
1462   },
1463   /* LD/ST */
1464   {
1465     COSTS_N_INSNS (1),  /* load.  */
1466     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1467     COSTS_N_INSNS (3),  /* ldrd.  */
1468     COSTS_N_INSNS (1),  /* ldm_1st.  */
1469     1,                  /* ldm_regs_per_insn_1st.  */
1470     2,                  /* ldm_regs_per_insn_subsequent.  */
1471     COSTS_N_INSNS (2),  /* loadf.  */
1472     COSTS_N_INSNS (2),  /* loadd.  */
1473     COSTS_N_INSNS (1),  /* load_unaligned.  */
1474     COSTS_N_INSNS (1),  /* store.  */
1475     COSTS_N_INSNS (3),  /* strd.  */
1476     COSTS_N_INSNS (1),  /* stm_1st.  */
1477     1,                  /* stm_regs_per_insn_1st.  */
1478     2,                  /* stm_regs_per_insn_subsequent.  */
1479     COSTS_N_INSNS (2),  /* storef.  */
1480     COSTS_N_INSNS (2),  /* stored.  */
1481     COSTS_N_INSNS (1),  /* store_unaligned.  */
1482     COSTS_N_INSNS (1),  /* loadv.  */
1483     COSTS_N_INSNS (1)   /* storev.  */
1484   },
1485   {
1486     /* FP SFmode */
1487     {
1488       COSTS_N_INSNS (15),       /* div.  */
1489       COSTS_N_INSNS (3),        /* mult.  */
1490       COSTS_N_INSNS (7),        /* mult_addsub. */
1491       COSTS_N_INSNS (7),        /* fma.  */
1492       COSTS_N_INSNS (3),        /* addsub.  */
1493       COSTS_N_INSNS (3),        /* fpconst.  */
1494       COSTS_N_INSNS (3),        /* neg.  */
1495       COSTS_N_INSNS (3),        /* compare.  */
1496       COSTS_N_INSNS (3),        /* widen.  */
1497       COSTS_N_INSNS (3),        /* narrow.  */
1498       COSTS_N_INSNS (3),        /* toint.  */
1499       COSTS_N_INSNS (3),        /* fromint.  */
1500       COSTS_N_INSNS (3)         /* roundint.  */
1501     },
1502     /* FP DFmode */
1503     {
1504       COSTS_N_INSNS (30),       /* div.  */
1505       COSTS_N_INSNS (6),        /* mult.  */
1506       COSTS_N_INSNS (10),       /* mult_addsub.  */
1507       COSTS_N_INSNS (7),        /* fma.  */
1508       COSTS_N_INSNS (3),        /* addsub.  */
1509       COSTS_N_INSNS (3),        /* fpconst.  */
1510       COSTS_N_INSNS (3),        /* neg.  */
1511       COSTS_N_INSNS (3),        /* compare.  */
1512       COSTS_N_INSNS (3),        /* widen.  */
1513       COSTS_N_INSNS (3),        /* narrow.  */
1514       COSTS_N_INSNS (3),        /* toint.  */
1515       COSTS_N_INSNS (3),        /* fromint.  */
1516       COSTS_N_INSNS (3)         /* roundint.  */
1517     }
1518   },
1519   /* Vector */
1520   {
1521     COSTS_N_INSNS (1),  /* alu.  */
1522     COSTS_N_INSNS (4),  /* mult.  */
1523     COSTS_N_INSNS (1),  /* movi.  */
1524     COSTS_N_INSNS (2),  /* dup.  */
1525     COSTS_N_INSNS (2)   /* extract.  */
1526   }
1527 };
1528
1529 const struct cpu_cost_table cortexa12_extra_costs =
1530 {
1531   /* ALU */
1532   {
1533     0,                  /* arith.  */
1534     0,                  /* logical.  */
1535     0,                  /* shift.  */
1536     COSTS_N_INSNS (1),  /* shift_reg.  */
1537     COSTS_N_INSNS (1),  /* arith_shift.  */
1538     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1539     COSTS_N_INSNS (1),  /* log_shift.  */
1540     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1541     0,                  /* extend.  */
1542     COSTS_N_INSNS (1),  /* extend_arith.  */
1543     0,                  /* bfi.  */
1544     COSTS_N_INSNS (1),  /* bfx.  */
1545     COSTS_N_INSNS (1),  /* clz.  */
1546     COSTS_N_INSNS (1),  /* rev.  */
1547     0,                  /* non_exec.  */
1548     true                /* non_exec_costs_exec.  */
1549   },
1550   /* MULT SImode */
1551   {
1552     {
1553       COSTS_N_INSNS (2),        /* simple.  */
1554       COSTS_N_INSNS (3),        /* flag_setting.  */
1555       COSTS_N_INSNS (2),        /* extend.  */
1556       COSTS_N_INSNS (3),        /* add.  */
1557       COSTS_N_INSNS (2),        /* extend_add.  */
1558       COSTS_N_INSNS (18)        /* idiv.  */
1559     },
1560     /* MULT DImode */
1561     {
1562       0,                        /* simple (N/A).  */
1563       0,                        /* flag_setting (N/A).  */
1564       COSTS_N_INSNS (3),        /* extend.  */
1565       0,                        /* add (N/A).  */
1566       COSTS_N_INSNS (3),        /* extend_add.  */
1567       0                         /* idiv (N/A).  */
1568     }
1569   },
1570   /* LD/ST */
1571   {
1572     COSTS_N_INSNS (3),  /* load.  */
1573     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1574     COSTS_N_INSNS (3),  /* ldrd.  */
1575     COSTS_N_INSNS (3),  /* ldm_1st.  */
1576     1,                  /* ldm_regs_per_insn_1st.  */
1577     2,                  /* ldm_regs_per_insn_subsequent.  */
1578     COSTS_N_INSNS (3),  /* loadf.  */
1579     COSTS_N_INSNS (3),  /* loadd.  */
1580     0,                  /* load_unaligned.  */
1581     0,                  /* store.  */
1582     0,                  /* strd.  */
1583     0,                  /* stm_1st.  */
1584     1,                  /* stm_regs_per_insn_1st.  */
1585     2,                  /* stm_regs_per_insn_subsequent.  */
1586     COSTS_N_INSNS (2),  /* storef.  */
1587     COSTS_N_INSNS (2),  /* stored.  */
1588     0,                  /* store_unaligned.  */
1589     COSTS_N_INSNS (1),  /* loadv.  */
1590     COSTS_N_INSNS (1)   /* storev.  */
1591   },
1592   {
1593     /* FP SFmode */
1594     {
1595       COSTS_N_INSNS (17),       /* div.  */
1596       COSTS_N_INSNS (4),        /* mult.  */
1597       COSTS_N_INSNS (8),        /* mult_addsub. */
1598       COSTS_N_INSNS (8),        /* fma.  */
1599       COSTS_N_INSNS (4),        /* addsub.  */
1600       COSTS_N_INSNS (2),        /* fpconst. */
1601       COSTS_N_INSNS (2),        /* neg.  */
1602       COSTS_N_INSNS (2),        /* compare.  */
1603       COSTS_N_INSNS (4),        /* widen.  */
1604       COSTS_N_INSNS (4),        /* narrow.  */
1605       COSTS_N_INSNS (4),        /* toint.  */
1606       COSTS_N_INSNS (4),        /* fromint.  */
1607       COSTS_N_INSNS (4)         /* roundint.  */
1608     },
1609     /* FP DFmode */
1610     {
1611       COSTS_N_INSNS (31),       /* div.  */
1612       COSTS_N_INSNS (4),        /* mult.  */
1613       COSTS_N_INSNS (8),        /* mult_addsub.  */
1614       COSTS_N_INSNS (8),        /* fma.  */
1615       COSTS_N_INSNS (4),        /* addsub.  */
1616       COSTS_N_INSNS (2),        /* fpconst.  */
1617       COSTS_N_INSNS (2),        /* neg.  */
1618       COSTS_N_INSNS (2),        /* compare.  */
1619       COSTS_N_INSNS (4),        /* widen.  */
1620       COSTS_N_INSNS (4),        /* narrow.  */
1621       COSTS_N_INSNS (4),        /* toint.  */
1622       COSTS_N_INSNS (4),        /* fromint.  */
1623       COSTS_N_INSNS (4)         /* roundint.  */
1624     }
1625   },
1626   /* Vector */
1627   {
1628     COSTS_N_INSNS (1),  /* alu.  */
1629     COSTS_N_INSNS (4),  /* mult.  */
1630     COSTS_N_INSNS (1),  /* movi.  */
1631     COSTS_N_INSNS (2),  /* dup.  */
1632     COSTS_N_INSNS (2)   /* extract.  */
1633   }
1634 };
1635
1636 const struct cpu_cost_table cortexa15_extra_costs =
1637 {
1638   /* ALU */
1639   {
1640     0,                  /* arith.  */
1641     0,                  /* logical.  */
1642     0,                  /* shift.  */
1643     0,                  /* shift_reg.  */
1644     COSTS_N_INSNS (1),  /* arith_shift.  */
1645     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1646     COSTS_N_INSNS (1),  /* log_shift.  */
1647     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1648     0,                  /* extend.  */
1649     COSTS_N_INSNS (1),  /* extend_arith.  */
1650     COSTS_N_INSNS (1),  /* bfi.  */
1651     0,                  /* bfx.  */
1652     0,                  /* clz.  */
1653     0,                  /* rev.  */
1654     0,                  /* non_exec.  */
1655     true                /* non_exec_costs_exec.  */
1656   },
1657   /* MULT SImode */
1658   {
1659     {
1660       COSTS_N_INSNS (2),        /* simple.  */
1661       COSTS_N_INSNS (3),        /* flag_setting.  */
1662       COSTS_N_INSNS (2),        /* extend.  */
1663       COSTS_N_INSNS (2),        /* add.  */
1664       COSTS_N_INSNS (2),        /* extend_add.  */
1665       COSTS_N_INSNS (18)        /* idiv.  */
1666     },
1667     /* MULT DImode */
1668     {
1669       0,                        /* simple (N/A).  */
1670       0,                        /* flag_setting (N/A).  */
1671       COSTS_N_INSNS (3),        /* extend.  */
1672       0,                        /* add (N/A).  */
1673       COSTS_N_INSNS (3),        /* extend_add.  */
1674       0                         /* idiv (N/A).  */
1675     }
1676   },
1677   /* LD/ST */
1678   {
1679     COSTS_N_INSNS (3),  /* load.  */
1680     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1681     COSTS_N_INSNS (3),  /* ldrd.  */
1682     COSTS_N_INSNS (4),  /* ldm_1st.  */
1683     1,                  /* ldm_regs_per_insn_1st.  */
1684     2,                  /* ldm_regs_per_insn_subsequent.  */
1685     COSTS_N_INSNS (4),  /* loadf.  */
1686     COSTS_N_INSNS (4),  /* loadd.  */
1687     0,                  /* load_unaligned.  */
1688     0,                  /* store.  */
1689     0,                  /* strd.  */
1690     COSTS_N_INSNS (1),  /* stm_1st.  */
1691     1,                  /* stm_regs_per_insn_1st.  */
1692     2,                  /* stm_regs_per_insn_subsequent.  */
1693     0,                  /* storef.  */
1694     0,                  /* stored.  */
1695     0,                  /* store_unaligned.  */
1696     COSTS_N_INSNS (1),  /* loadv.  */
1697     COSTS_N_INSNS (1)   /* storev.  */
1698   },
1699   {
1700     /* FP SFmode */
1701     {
1702       COSTS_N_INSNS (17),       /* div.  */
1703       COSTS_N_INSNS (4),        /* mult.  */
1704       COSTS_N_INSNS (8),        /* mult_addsub. */
1705       COSTS_N_INSNS (8),        /* fma.  */
1706       COSTS_N_INSNS (4),        /* addsub.  */
1707       COSTS_N_INSNS (2),        /* fpconst. */
1708       COSTS_N_INSNS (2),        /* neg.  */
1709       COSTS_N_INSNS (5),        /* compare.  */
1710       COSTS_N_INSNS (4),        /* widen.  */
1711       COSTS_N_INSNS (4),        /* narrow.  */
1712       COSTS_N_INSNS (4),        /* toint.  */
1713       COSTS_N_INSNS (4),        /* fromint.  */
1714       COSTS_N_INSNS (4)         /* roundint.  */
1715     },
1716     /* FP DFmode */
1717     {
1718       COSTS_N_INSNS (31),       /* div.  */
1719       COSTS_N_INSNS (4),        /* mult.  */
1720       COSTS_N_INSNS (8),        /* mult_addsub.  */
1721       COSTS_N_INSNS (8),        /* fma.  */
1722       COSTS_N_INSNS (4),        /* addsub.  */
1723       COSTS_N_INSNS (2),        /* fpconst.  */
1724       COSTS_N_INSNS (2),        /* neg.  */
1725       COSTS_N_INSNS (2),        /* compare.  */
1726       COSTS_N_INSNS (4),        /* widen.  */
1727       COSTS_N_INSNS (4),        /* narrow.  */
1728       COSTS_N_INSNS (4),        /* toint.  */
1729       COSTS_N_INSNS (4),        /* fromint.  */
1730       COSTS_N_INSNS (4)         /* roundint.  */
1731     }
1732   },
1733   /* Vector */
1734   {
1735     COSTS_N_INSNS (1),  /* alu.  */
1736     COSTS_N_INSNS (4),  /* mult.  */
1737     COSTS_N_INSNS (1),  /* movi.  */
1738     COSTS_N_INSNS (2),  /* dup.  */
1739     COSTS_N_INSNS (2)   /* extract.  */
1740   }
1741 };
1742
1743 const struct cpu_cost_table v7m_extra_costs =
1744 {
1745   /* ALU */
1746   {
1747     0,                  /* arith.  */
1748     0,                  /* logical.  */
1749     0,                  /* shift.  */
1750     0,                  /* shift_reg.  */
1751     0,                  /* arith_shift.  */
1752     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1753     0,                  /* log_shift.  */
1754     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1755     0,                  /* extend.  */
1756     COSTS_N_INSNS (1),  /* extend_arith.  */
1757     0,                  /* bfi.  */
1758     0,                  /* bfx.  */
1759     0,                  /* clz.  */
1760     0,                  /* rev.  */
1761     COSTS_N_INSNS (1),  /* non_exec.  */
1762     false               /* non_exec_costs_exec.  */
1763   },
1764   {
1765     /* MULT SImode */
1766     {
1767       COSTS_N_INSNS (1),        /* simple.  */
1768       COSTS_N_INSNS (1),        /* flag_setting.  */
1769       COSTS_N_INSNS (2),        /* extend.  */
1770       COSTS_N_INSNS (1),        /* add.  */
1771       COSTS_N_INSNS (3),        /* extend_add.  */
1772       COSTS_N_INSNS (8)         /* idiv.  */
1773     },
1774     /* MULT DImode */
1775     {
1776       0,                        /* simple (N/A).  */
1777       0,                        /* flag_setting (N/A).  */
1778       COSTS_N_INSNS (2),        /* extend.  */
1779       0,                        /* add (N/A).  */
1780       COSTS_N_INSNS (3),        /* extend_add.  */
1781       0                         /* idiv (N/A).  */
1782     }
1783   },
1784   /* LD/ST */
1785   {
1786     COSTS_N_INSNS (2),  /* load.  */
1787     0,                  /* load_sign_extend.  */
1788     COSTS_N_INSNS (3),  /* ldrd.  */
1789     COSTS_N_INSNS (2),  /* ldm_1st.  */
1790     1,                  /* ldm_regs_per_insn_1st.  */
1791     1,                  /* ldm_regs_per_insn_subsequent.  */
1792     COSTS_N_INSNS (2),  /* loadf.  */
1793     COSTS_N_INSNS (3),  /* loadd.  */
1794     COSTS_N_INSNS (1),  /* load_unaligned.  */
1795     COSTS_N_INSNS (2),  /* store.  */
1796     COSTS_N_INSNS (3),  /* strd.  */
1797     COSTS_N_INSNS (2),  /* stm_1st.  */
1798     1,                  /* stm_regs_per_insn_1st.  */
1799     1,                  /* stm_regs_per_insn_subsequent.  */
1800     COSTS_N_INSNS (2),  /* storef.  */
1801     COSTS_N_INSNS (3),  /* stored.  */
1802     COSTS_N_INSNS (1),  /* store_unaligned.  */
1803     COSTS_N_INSNS (1),  /* loadv.  */
1804     COSTS_N_INSNS (1)   /* storev.  */
1805   },
1806   {
1807     /* FP SFmode */
1808     {
1809       COSTS_N_INSNS (7),        /* div.  */
1810       COSTS_N_INSNS (2),        /* mult.  */
1811       COSTS_N_INSNS (5),        /* mult_addsub.  */
1812       COSTS_N_INSNS (3),        /* fma.  */
1813       COSTS_N_INSNS (1),        /* addsub.  */
1814       0,                        /* fpconst.  */
1815       0,                        /* neg.  */
1816       0,                        /* compare.  */
1817       0,                        /* widen.  */
1818       0,                        /* narrow.  */
1819       0,                        /* toint.  */
1820       0,                        /* fromint.  */
1821       0                         /* roundint.  */
1822     },
1823     /* FP DFmode */
1824     {
1825       COSTS_N_INSNS (15),       /* div.  */
1826       COSTS_N_INSNS (5),        /* mult.  */
1827       COSTS_N_INSNS (7),        /* mult_addsub.  */
1828       COSTS_N_INSNS (7),        /* fma.  */
1829       COSTS_N_INSNS (3),        /* addsub.  */
1830       0,                        /* fpconst.  */
1831       0,                        /* neg.  */
1832       0,                        /* compare.  */
1833       0,                        /* widen.  */
1834       0,                        /* narrow.  */
1835       0,                        /* toint.  */
1836       0,                        /* fromint.  */
1837       0                         /* roundint.  */
1838     }
1839   },
1840   /* Vector */
1841   {
1842     COSTS_N_INSNS (1),  /* alu.  */
1843     COSTS_N_INSNS (4),  /* mult.  */
1844     COSTS_N_INSNS (1),  /* movi.  */
1845     COSTS_N_INSNS (2),  /* dup.  */
1846     COSTS_N_INSNS (2)   /* extract.  */
1847   }
1848 };
1849
1850 const struct addr_mode_cost_table generic_addr_mode_costs =
1851 {
1852   /* int.  */
1853   {
1854     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1855     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1856     COSTS_N_INSNS (0)   /* AMO_WB.  */
1857   },
1858   /* float.  */
1859   {
1860     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1861     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1862     COSTS_N_INSNS (0)   /* AMO_WB.  */
1863   },
1864   /* vector.  */
1865   {
1866     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1867     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1868     COSTS_N_INSNS (0)   /* AMO_WB.  */
1869   }
1870 };
1871
1872 const struct tune_params arm_slowmul_tune =
1873 {
1874   &generic_extra_costs,                 /* Insn extra costs.  */
1875   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1876   NULL,                                 /* Sched adj cost.  */
1877   arm_default_branch_cost,
1878   &arm_default_vec_cost,
1879   3,                                            /* Constant limit.  */
1880   5,                                            /* Max cond insns.  */
1881   8,                                            /* Memset max inline.  */
1882   1,                                            /* Issue rate.  */
1883   ARM_PREFETCH_NOT_BENEFICIAL,
1884   tune_params::PREF_CONST_POOL_TRUE,
1885   tune_params::PREF_LDRD_FALSE,
1886   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1887   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1888   tune_params::DISPARAGE_FLAGS_NEITHER,
1889   tune_params::PREF_NEON_STRINGOPS_FALSE,
1890   tune_params::FUSE_NOTHING,
1891   tune_params::SCHED_AUTOPREF_OFF
1892 };
1893
1894 const struct tune_params arm_fastmul_tune =
1895 {
1896   &generic_extra_costs,                 /* Insn extra costs.  */
1897   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1898   NULL,                                 /* Sched adj cost.  */
1899   arm_default_branch_cost,
1900   &arm_default_vec_cost,
1901   1,                                            /* Constant limit.  */
1902   5,                                            /* Max cond insns.  */
1903   8,                                            /* Memset max inline.  */
1904   1,                                            /* Issue rate.  */
1905   ARM_PREFETCH_NOT_BENEFICIAL,
1906   tune_params::PREF_CONST_POOL_TRUE,
1907   tune_params::PREF_LDRD_FALSE,
1908   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1909   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1910   tune_params::DISPARAGE_FLAGS_NEITHER,
1911   tune_params::PREF_NEON_STRINGOPS_FALSE,
1912   tune_params::FUSE_NOTHING,
1913   tune_params::SCHED_AUTOPREF_OFF
1914 };
1915
1916 /* StrongARM has early execution of branches, so a sequence that is worth
1917    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1918
1919 const struct tune_params arm_strongarm_tune =
1920 {
1921   &generic_extra_costs,                 /* Insn extra costs.  */
1922   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1923   NULL,                                 /* Sched adj cost.  */
1924   arm_default_branch_cost,
1925   &arm_default_vec_cost,
1926   1,                                            /* Constant limit.  */
1927   3,                                            /* Max cond insns.  */
1928   8,                                            /* Memset max inline.  */
1929   1,                                            /* Issue rate.  */
1930   ARM_PREFETCH_NOT_BENEFICIAL,
1931   tune_params::PREF_CONST_POOL_TRUE,
1932   tune_params::PREF_LDRD_FALSE,
1933   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1934   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1935   tune_params::DISPARAGE_FLAGS_NEITHER,
1936   tune_params::PREF_NEON_STRINGOPS_FALSE,
1937   tune_params::FUSE_NOTHING,
1938   tune_params::SCHED_AUTOPREF_OFF
1939 };
1940
1941 const struct tune_params arm_xscale_tune =
1942 {
1943   &generic_extra_costs,                 /* Insn extra costs.  */
1944   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1945   xscale_sched_adjust_cost,
1946   arm_default_branch_cost,
1947   &arm_default_vec_cost,
1948   2,                                            /* Constant limit.  */
1949   3,                                            /* Max cond insns.  */
1950   8,                                            /* Memset max inline.  */
1951   1,                                            /* Issue rate.  */
1952   ARM_PREFETCH_NOT_BENEFICIAL,
1953   tune_params::PREF_CONST_POOL_TRUE,
1954   tune_params::PREF_LDRD_FALSE,
1955   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1956   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1957   tune_params::DISPARAGE_FLAGS_NEITHER,
1958   tune_params::PREF_NEON_STRINGOPS_FALSE,
1959   tune_params::FUSE_NOTHING,
1960   tune_params::SCHED_AUTOPREF_OFF
1961 };
1962
1963 const struct tune_params arm_9e_tune =
1964 {
1965   &generic_extra_costs,                 /* Insn extra costs.  */
1966   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1967   NULL,                                 /* Sched adj cost.  */
1968   arm_default_branch_cost,
1969   &arm_default_vec_cost,
1970   1,                                            /* Constant limit.  */
1971   5,                                            /* Max cond insns.  */
1972   8,                                            /* Memset max inline.  */
1973   1,                                            /* Issue rate.  */
1974   ARM_PREFETCH_NOT_BENEFICIAL,
1975   tune_params::PREF_CONST_POOL_TRUE,
1976   tune_params::PREF_LDRD_FALSE,
1977   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1978   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1979   tune_params::DISPARAGE_FLAGS_NEITHER,
1980   tune_params::PREF_NEON_STRINGOPS_FALSE,
1981   tune_params::FUSE_NOTHING,
1982   tune_params::SCHED_AUTOPREF_OFF
1983 };
1984
1985 const struct tune_params arm_marvell_pj4_tune =
1986 {
1987   &generic_extra_costs,                 /* Insn extra costs.  */
1988   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1989   NULL,                                 /* Sched adj cost.  */
1990   arm_default_branch_cost,
1991   &arm_default_vec_cost,
1992   1,                                            /* Constant limit.  */
1993   5,                                            /* Max cond insns.  */
1994   8,                                            /* Memset max inline.  */
1995   2,                                            /* Issue rate.  */
1996   ARM_PREFETCH_NOT_BENEFICIAL,
1997   tune_params::PREF_CONST_POOL_TRUE,
1998   tune_params::PREF_LDRD_FALSE,
1999   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2000   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2001   tune_params::DISPARAGE_FLAGS_NEITHER,
2002   tune_params::PREF_NEON_STRINGOPS_FALSE,
2003   tune_params::FUSE_NOTHING,
2004   tune_params::SCHED_AUTOPREF_OFF
2005 };
2006
2007 const struct tune_params arm_v6t2_tune =
2008 {
2009   &generic_extra_costs,                 /* Insn extra costs.  */
2010   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2011   NULL,                                 /* Sched adj cost.  */
2012   arm_default_branch_cost,
2013   &arm_default_vec_cost,
2014   1,                                            /* Constant limit.  */
2015   5,                                            /* Max cond insns.  */
2016   8,                                            /* Memset max inline.  */
2017   1,                                            /* Issue rate.  */
2018   ARM_PREFETCH_NOT_BENEFICIAL,
2019   tune_params::PREF_CONST_POOL_FALSE,
2020   tune_params::PREF_LDRD_FALSE,
2021   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2022   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2023   tune_params::DISPARAGE_FLAGS_NEITHER,
2024   tune_params::PREF_NEON_STRINGOPS_FALSE,
2025   tune_params::FUSE_NOTHING,
2026   tune_params::SCHED_AUTOPREF_OFF
2027 };
2028
2029
2030 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2031 const struct tune_params arm_cortex_tune =
2032 {
2033   &generic_extra_costs,
2034   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2035   NULL,                                 /* Sched adj cost.  */
2036   arm_default_branch_cost,
2037   &arm_default_vec_cost,
2038   1,                                            /* Constant limit.  */
2039   5,                                            /* Max cond insns.  */
2040   8,                                            /* Memset max inline.  */
2041   2,                                            /* Issue rate.  */
2042   ARM_PREFETCH_NOT_BENEFICIAL,
2043   tune_params::PREF_CONST_POOL_FALSE,
2044   tune_params::PREF_LDRD_FALSE,
2045   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2046   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2047   tune_params::DISPARAGE_FLAGS_NEITHER,
2048   tune_params::PREF_NEON_STRINGOPS_FALSE,
2049   tune_params::FUSE_NOTHING,
2050   tune_params::SCHED_AUTOPREF_OFF
2051 };
2052
2053 const struct tune_params arm_cortex_a8_tune =
2054 {
2055   &cortexa8_extra_costs,
2056   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2057   NULL,                                 /* Sched adj cost.  */
2058   arm_default_branch_cost,
2059   &arm_default_vec_cost,
2060   1,                                            /* Constant limit.  */
2061   5,                                            /* Max cond insns.  */
2062   8,                                            /* Memset max inline.  */
2063   2,                                            /* Issue rate.  */
2064   ARM_PREFETCH_NOT_BENEFICIAL,
2065   tune_params::PREF_CONST_POOL_FALSE,
2066   tune_params::PREF_LDRD_FALSE,
2067   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2068   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2069   tune_params::DISPARAGE_FLAGS_NEITHER,
2070   tune_params::PREF_NEON_STRINGOPS_TRUE,
2071   tune_params::FUSE_NOTHING,
2072   tune_params::SCHED_AUTOPREF_OFF
2073 };
2074
2075 const struct tune_params arm_cortex_a7_tune =
2076 {
2077   &cortexa7_extra_costs,
2078   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2079   NULL,                                 /* Sched adj cost.  */
2080   arm_default_branch_cost,
2081   &arm_default_vec_cost,
2082   1,                                            /* Constant limit.  */
2083   5,                                            /* Max cond insns.  */
2084   8,                                            /* Memset max inline.  */
2085   2,                                            /* Issue rate.  */
2086   ARM_PREFETCH_NOT_BENEFICIAL,
2087   tune_params::PREF_CONST_POOL_FALSE,
2088   tune_params::PREF_LDRD_FALSE,
2089   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2090   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2091   tune_params::DISPARAGE_FLAGS_NEITHER,
2092   tune_params::PREF_NEON_STRINGOPS_TRUE,
2093   tune_params::FUSE_NOTHING,
2094   tune_params::SCHED_AUTOPREF_OFF
2095 };
2096
2097 const struct tune_params arm_cortex_a15_tune =
2098 {
2099   &cortexa15_extra_costs,
2100   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2101   NULL,                                 /* Sched adj cost.  */
2102   arm_default_branch_cost,
2103   &arm_default_vec_cost,
2104   1,                                            /* Constant limit.  */
2105   2,                                            /* Max cond insns.  */
2106   8,                                            /* Memset max inline.  */
2107   3,                                            /* Issue rate.  */
2108   ARM_PREFETCH_NOT_BENEFICIAL,
2109   tune_params::PREF_CONST_POOL_FALSE,
2110   tune_params::PREF_LDRD_TRUE,
2111   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2112   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2113   tune_params::DISPARAGE_FLAGS_ALL,
2114   tune_params::PREF_NEON_STRINGOPS_TRUE,
2115   tune_params::FUSE_NOTHING,
2116   tune_params::SCHED_AUTOPREF_FULL
2117 };
2118
2119 const struct tune_params arm_cortex_a35_tune =
2120 {
2121   &cortexa53_extra_costs,
2122   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2123   NULL,                                 /* Sched adj cost.  */
2124   arm_default_branch_cost,
2125   &arm_default_vec_cost,
2126   1,                                            /* Constant limit.  */
2127   5,                                            /* Max cond insns.  */
2128   8,                                            /* Memset max inline.  */
2129   1,                                            /* Issue rate.  */
2130   ARM_PREFETCH_NOT_BENEFICIAL,
2131   tune_params::PREF_CONST_POOL_FALSE,
2132   tune_params::PREF_LDRD_FALSE,
2133   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2134   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2135   tune_params::DISPARAGE_FLAGS_NEITHER,
2136   tune_params::PREF_NEON_STRINGOPS_TRUE,
2137   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2138   tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_cortex_a53_tune =
2142 {
2143   &cortexa53_extra_costs,
2144   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2145   NULL,                                 /* Sched adj cost.  */
2146   arm_default_branch_cost,
2147   &arm_default_vec_cost,
2148   1,                                            /* Constant limit.  */
2149   5,                                            /* Max cond insns.  */
2150   8,                                            /* Memset max inline.  */
2151   2,                                            /* Issue rate.  */
2152   ARM_PREFETCH_NOT_BENEFICIAL,
2153   tune_params::PREF_CONST_POOL_FALSE,
2154   tune_params::PREF_LDRD_FALSE,
2155   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2156   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2157   tune_params::DISPARAGE_FLAGS_NEITHER,
2158   tune_params::PREF_NEON_STRINGOPS_TRUE,
2159   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2160   tune_params::SCHED_AUTOPREF_OFF
2161 };
2162
2163 const struct tune_params arm_cortex_a57_tune =
2164 {
2165   &cortexa57_extra_costs,
2166   &generic_addr_mode_costs,             /* addressing mode costs */
2167   NULL,                                 /* Sched adj cost.  */
2168   arm_default_branch_cost,
2169   &arm_default_vec_cost,
2170   1,                                            /* Constant limit.  */
2171   2,                                            /* Max cond insns.  */
2172   8,                                            /* Memset max inline.  */
2173   3,                                            /* Issue rate.  */
2174   ARM_PREFETCH_NOT_BENEFICIAL,
2175   tune_params::PREF_CONST_POOL_FALSE,
2176   tune_params::PREF_LDRD_TRUE,
2177   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2178   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2179   tune_params::DISPARAGE_FLAGS_ALL,
2180   tune_params::PREF_NEON_STRINGOPS_TRUE,
2181   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2182   tune_params::SCHED_AUTOPREF_FULL
2183 };
2184
2185 const struct tune_params arm_exynosm1_tune =
2186 {
2187   &exynosm1_extra_costs,
2188   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2189   NULL,                                         /* Sched adj cost.  */
2190   arm_default_branch_cost,
2191   &arm_default_vec_cost,
2192   1,                                            /* Constant limit.  */
2193   2,                                            /* Max cond insns.  */
2194   8,                                            /* Memset max inline.  */
2195   3,                                            /* Issue rate.  */
2196   ARM_PREFETCH_NOT_BENEFICIAL,
2197   tune_params::PREF_CONST_POOL_FALSE,
2198   tune_params::PREF_LDRD_TRUE,
2199   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2200   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2201   tune_params::DISPARAGE_FLAGS_ALL,
2202   tune_params::PREF_NEON_STRINGOPS_TRUE,
2203   tune_params::FUSE_NOTHING,
2204   tune_params::SCHED_AUTOPREF_OFF
2205 };
2206
2207 const struct tune_params arm_xgene1_tune =
2208 {
2209   &xgene1_extra_costs,
2210   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2211   NULL,                                 /* Sched adj cost.  */
2212   arm_default_branch_cost,
2213   &arm_default_vec_cost,
2214   1,                                            /* Constant limit.  */
2215   2,                                            /* Max cond insns.  */
2216   32,                                           /* Memset max inline.  */
2217   4,                                            /* Issue rate.  */
2218   ARM_PREFETCH_NOT_BENEFICIAL,
2219   tune_params::PREF_CONST_POOL_FALSE,
2220   tune_params::PREF_LDRD_TRUE,
2221   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2222   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2223   tune_params::DISPARAGE_FLAGS_ALL,
2224   tune_params::PREF_NEON_STRINGOPS_FALSE,
2225   tune_params::FUSE_NOTHING,
2226   tune_params::SCHED_AUTOPREF_OFF
2227 };
2228
2229 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2230    less appealing.  Set max_insns_skipped to a low value.  */
2231
2232 const struct tune_params arm_cortex_a5_tune =
2233 {
2234   &cortexa5_extra_costs,
2235   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2236   NULL,                                 /* Sched adj cost.  */
2237   arm_cortex_a5_branch_cost,
2238   &arm_default_vec_cost,
2239   1,                                            /* Constant limit.  */
2240   1,                                            /* Max cond insns.  */
2241   8,                                            /* Memset max inline.  */
2242   2,                                            /* Issue rate.  */
2243   ARM_PREFETCH_NOT_BENEFICIAL,
2244   tune_params::PREF_CONST_POOL_FALSE,
2245   tune_params::PREF_LDRD_FALSE,
2246   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2247   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2248   tune_params::DISPARAGE_FLAGS_NEITHER,
2249   tune_params::PREF_NEON_STRINGOPS_TRUE,
2250   tune_params::FUSE_NOTHING,
2251   tune_params::SCHED_AUTOPREF_OFF
2252 };
2253
2254 const struct tune_params arm_cortex_a9_tune =
2255 {
2256   &cortexa9_extra_costs,
2257   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2258   cortex_a9_sched_adjust_cost,
2259   arm_default_branch_cost,
2260   &arm_default_vec_cost,
2261   1,                                            /* Constant limit.  */
2262   5,                                            /* Max cond insns.  */
2263   8,                                            /* Memset max inline.  */
2264   2,                                            /* Issue rate.  */
2265   ARM_PREFETCH_BENEFICIAL(4,32,32),
2266   tune_params::PREF_CONST_POOL_FALSE,
2267   tune_params::PREF_LDRD_FALSE,
2268   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2269   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2270   tune_params::DISPARAGE_FLAGS_NEITHER,
2271   tune_params::PREF_NEON_STRINGOPS_FALSE,
2272   tune_params::FUSE_NOTHING,
2273   tune_params::SCHED_AUTOPREF_OFF
2274 };
2275
2276 const struct tune_params arm_cortex_a12_tune =
2277 {
2278   &cortexa12_extra_costs,
2279   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2280   NULL,                                 /* Sched adj cost.  */
2281   arm_default_branch_cost,
2282   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2283   1,                                            /* Constant limit.  */
2284   2,                                            /* Max cond insns.  */
2285   8,                                            /* Memset max inline.  */
2286   2,                                            /* Issue rate.  */
2287   ARM_PREFETCH_NOT_BENEFICIAL,
2288   tune_params::PREF_CONST_POOL_FALSE,
2289   tune_params::PREF_LDRD_TRUE,
2290   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2291   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2292   tune_params::DISPARAGE_FLAGS_ALL,
2293   tune_params::PREF_NEON_STRINGOPS_TRUE,
2294   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2295   tune_params::SCHED_AUTOPREF_OFF
2296 };
2297
2298 const struct tune_params arm_cortex_a73_tune =
2299 {
2300   &cortexa57_extra_costs,
2301   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2302   NULL,                                         /* Sched adj cost.  */
2303   arm_default_branch_cost,
2304   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2305   1,                                            /* Constant limit.  */
2306   2,                                            /* Max cond insns.  */
2307   8,                                            /* Memset max inline.  */
2308   2,                                            /* Issue rate.  */
2309   ARM_PREFETCH_NOT_BENEFICIAL,
2310   tune_params::PREF_CONST_POOL_FALSE,
2311   tune_params::PREF_LDRD_TRUE,
2312   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2313   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2314   tune_params::DISPARAGE_FLAGS_ALL,
2315   tune_params::PREF_NEON_STRINGOPS_TRUE,
2316   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2317   tune_params::SCHED_AUTOPREF_FULL
2318 };
2319
2320 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2321    cycle to execute each.  An LDR from the constant pool also takes two cycles
2322    to execute, but mildly increases pipelining opportunity (consecutive
2323    loads/stores can be pipelined together, saving one cycle), and may also
2324    improve icache utilisation.  Hence we prefer the constant pool for such
2325    processors.  */
2326
2327 const struct tune_params arm_v7m_tune =
2328 {
2329   &v7m_extra_costs,
2330   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2331   NULL,                                 /* Sched adj cost.  */
2332   arm_cortex_m_branch_cost,
2333   &arm_default_vec_cost,
2334   1,                                            /* Constant limit.  */
2335   2,                                            /* Max cond insns.  */
2336   8,                                            /* Memset max inline.  */
2337   1,                                            /* Issue rate.  */
2338   ARM_PREFETCH_NOT_BENEFICIAL,
2339   tune_params::PREF_CONST_POOL_TRUE,
2340   tune_params::PREF_LDRD_FALSE,
2341   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2342   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2343   tune_params::DISPARAGE_FLAGS_NEITHER,
2344   tune_params::PREF_NEON_STRINGOPS_FALSE,
2345   tune_params::FUSE_NOTHING,
2346   tune_params::SCHED_AUTOPREF_OFF
2347 };
2348
2349 /* Cortex-M7 tuning.  */
2350
2351 const struct tune_params arm_cortex_m7_tune =
2352 {
2353   &v7m_extra_costs,
2354   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2355   NULL,                                 /* Sched adj cost.  */
2356   arm_cortex_m7_branch_cost,
2357   &arm_default_vec_cost,
2358   0,                                            /* Constant limit.  */
2359   1,                                            /* Max cond insns.  */
2360   8,                                            /* Memset max inline.  */
2361   2,                                            /* Issue rate.  */
2362   ARM_PREFETCH_NOT_BENEFICIAL,
2363   tune_params::PREF_CONST_POOL_TRUE,
2364   tune_params::PREF_LDRD_FALSE,
2365   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2366   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2367   tune_params::DISPARAGE_FLAGS_NEITHER,
2368   tune_params::PREF_NEON_STRINGOPS_FALSE,
2369   tune_params::FUSE_NOTHING,
2370   tune_params::SCHED_AUTOPREF_OFF
2371 };
2372
2373 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2374    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2375    cortex-m23.  */
2376 const struct tune_params arm_v6m_tune =
2377 {
2378   &generic_extra_costs,                 /* Insn extra costs.  */
2379   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2380   NULL,                                 /* Sched adj cost.  */
2381   arm_default_branch_cost,
2382   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2383   1,                                            /* Constant limit.  */
2384   5,                                            /* Max cond insns.  */
2385   8,                                            /* Memset max inline.  */
2386   1,                                            /* Issue rate.  */
2387   ARM_PREFETCH_NOT_BENEFICIAL,
2388   tune_params::PREF_CONST_POOL_FALSE,
2389   tune_params::PREF_LDRD_FALSE,
2390   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2391   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2392   tune_params::DISPARAGE_FLAGS_NEITHER,
2393   tune_params::PREF_NEON_STRINGOPS_FALSE,
2394   tune_params::FUSE_NOTHING,
2395   tune_params::SCHED_AUTOPREF_OFF
2396 };
2397
2398 const struct tune_params arm_fa726te_tune =
2399 {
2400   &generic_extra_costs,                         /* Insn extra costs.  */
2401   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2402   fa726te_sched_adjust_cost,
2403   arm_default_branch_cost,
2404   &arm_default_vec_cost,
2405   1,                                            /* Constant limit.  */
2406   5,                                            /* Max cond insns.  */
2407   8,                                            /* Memset max inline.  */
2408   2,                                            /* Issue rate.  */
2409   ARM_PREFETCH_NOT_BENEFICIAL,
2410   tune_params::PREF_CONST_POOL_TRUE,
2411   tune_params::PREF_LDRD_FALSE,
2412   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2413   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2414   tune_params::DISPARAGE_FLAGS_NEITHER,
2415   tune_params::PREF_NEON_STRINGOPS_FALSE,
2416   tune_params::FUSE_NOTHING,
2417   tune_params::SCHED_AUTOPREF_OFF
2418 };
2419
2420 /* Auto-generated CPU, FPU and architecture tables.  */
2421 #include "arm-cpu-data.h"
2422
2423 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2424    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2425    is thus chosen to be big enough to hold the longest architecture name.  */
2426
2427 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2428
2429 /* Supported TLS relocations.  */
2430
2431 enum tls_reloc {
2432   TLS_GD32,
2433   TLS_GD32_FDPIC,
2434   TLS_LDM32,
2435   TLS_LDM32_FDPIC,
2436   TLS_LDO32,
2437   TLS_IE32,
2438   TLS_IE32_FDPIC,
2439   TLS_LE32,
2440   TLS_DESCSEQ   /* GNU scheme */
2441 };
2442
2443 /* The maximum number of insns to be used when loading a constant.  */
2444 inline static int
2445 arm_constant_limit (bool size_p)
2446 {
2447   return size_p ? 1 : current_tune->constant_limit;
2448 }
2449
2450 /* Emit an insn that's a simple single-set.  Both the operands must be known
2451    to be valid.  */
2452 inline static rtx_insn *
2453 emit_set_insn (rtx x, rtx y)
2454 {
2455   return emit_insn (gen_rtx_SET (x, y));
2456 }
2457
2458 /* Return the number of bits set in VALUE.  */
2459 static unsigned
2460 bit_count (unsigned long value)
2461 {
2462   unsigned long count = 0;
2463
2464   while (value)
2465     {
2466       count++;
2467       value &= value - 1;  /* Clear the least-significant set bit.  */
2468     }
2469
2470   return count;
2471 }
2472
2473 /* Return the number of bits set in BMAP.  */
2474 static unsigned
2475 bitmap_popcount (const sbitmap bmap)
2476 {
2477   unsigned int count = 0;
2478   unsigned int n = 0;
2479   sbitmap_iterator sbi;
2480
2481   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2482     count++;
2483   return count;
2484 }
2485
2486 typedef struct
2487 {
2488   machine_mode mode;
2489   const char *name;
2490 } arm_fixed_mode_set;
2491
2492 /* A small helper for setting fixed-point library libfuncs.  */
2493
2494 static void
2495 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2496                              const char *funcname, const char *modename,
2497                              int num_suffix)
2498 {
2499   char buffer[50];
2500
2501   if (num_suffix == 0)
2502     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2503   else
2504     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2505
2506   set_optab_libfunc (optable, mode, buffer);
2507 }
2508
2509 static void
2510 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2511                             machine_mode from, const char *funcname,
2512                             const char *toname, const char *fromname)
2513 {
2514   char buffer[50];
2515   const char *maybe_suffix_2 = "";
2516
2517   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2518   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2519       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2520       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2521     maybe_suffix_2 = "2";
2522
2523   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2524            maybe_suffix_2);
2525
2526   set_conv_libfunc (optable, to, from, buffer);
2527 }
2528
2529 static GTY(()) rtx speculation_barrier_libfunc;
2530
2531 /* Record that we have no arithmetic or comparison libfuncs for
2532    machine mode MODE.  */
2533
2534 static void
2535 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2536 {
2537   /* Arithmetic.  */
2538   set_optab_libfunc (add_optab, mode, NULL);
2539   set_optab_libfunc (sdiv_optab, mode, NULL);
2540   set_optab_libfunc (smul_optab, mode, NULL);
2541   set_optab_libfunc (neg_optab, mode, NULL);
2542   set_optab_libfunc (sub_optab, mode, NULL);
2543
2544   /* Comparisons.  */
2545   set_optab_libfunc (eq_optab, mode, NULL);
2546   set_optab_libfunc (ne_optab, mode, NULL);
2547   set_optab_libfunc (lt_optab, mode, NULL);
2548   set_optab_libfunc (le_optab, mode, NULL);
2549   set_optab_libfunc (ge_optab, mode, NULL);
2550   set_optab_libfunc (gt_optab, mode, NULL);
2551   set_optab_libfunc (unord_optab, mode, NULL);
2552 }
2553
2554 /* Set up library functions unique to ARM.  */
2555 static void
2556 arm_init_libfuncs (void)
2557 {
2558   machine_mode mode_iter;
2559
2560   /* For Linux, we have access to kernel support for atomic operations.  */
2561   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2562     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2563
2564   /* There are no special library functions unless we are using the
2565      ARM BPABI.  */
2566   if (!TARGET_BPABI)
2567     return;
2568
2569   /* The functions below are described in Section 4 of the "Run-Time
2570      ABI for the ARM architecture", Version 1.0.  */
2571
2572   /* Double-precision floating-point arithmetic.  Table 2.  */
2573   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2574   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2575   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2576   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2577   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2578
2579   /* Double-precision comparisons.  Table 3.  */
2580   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2581   set_optab_libfunc (ne_optab, DFmode, NULL);
2582   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2583   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2584   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2585   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2586   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2587
2588   /* Single-precision floating-point arithmetic.  Table 4.  */
2589   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2590   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2591   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2592   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2593   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2594
2595   /* Single-precision comparisons.  Table 5.  */
2596   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2597   set_optab_libfunc (ne_optab, SFmode, NULL);
2598   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2599   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2600   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2601   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2602   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2603
2604   /* Floating-point to integer conversions.  Table 6.  */
2605   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2606   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2607   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2608   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2609   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2610   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2611   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2612   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2613
2614   /* Conversions between floating types.  Table 7.  */
2615   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2616   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2617
2618   /* Integer to floating-point conversions.  Table 8.  */
2619   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2620   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2621   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2622   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2623   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2624   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2625   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2626   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2627
2628   /* Long long.  Table 9.  */
2629   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2630   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2631   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2632   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2633   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2634   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2635   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2636   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2637
2638   /* Integer (32/32->32) division.  \S 4.3.1.  */
2639   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2640   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2641
2642   /* The divmod functions are designed so that they can be used for
2643      plain division, even though they return both the quotient and the
2644      remainder.  The quotient is returned in the usual location (i.e.,
2645      r0 for SImode, {r0, r1} for DImode), just as would be expected
2646      for an ordinary division routine.  Because the AAPCS calling
2647      conventions specify that all of { r0, r1, r2, r3 } are
2648      callee-saved registers, there is no need to tell the compiler
2649      explicitly that those registers are clobbered by these
2650      routines.  */
2651   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2652   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2653
2654   /* For SImode division the ABI provides div-without-mod routines,
2655      which are faster.  */
2656   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2657   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2658
2659   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2660      divmod libcalls instead.  */
2661   set_optab_libfunc (smod_optab, DImode, NULL);
2662   set_optab_libfunc (umod_optab, DImode, NULL);
2663   set_optab_libfunc (smod_optab, SImode, NULL);
2664   set_optab_libfunc (umod_optab, SImode, NULL);
2665
2666   /* Half-precision float operations.  The compiler handles all operations
2667      with NULL libfuncs by converting the SFmode.  */
2668   switch (arm_fp16_format)
2669     {
2670     case ARM_FP16_FORMAT_IEEE:
2671     case ARM_FP16_FORMAT_ALTERNATIVE:
2672
2673       /* Conversions.  */
2674       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2675                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2676                          ? "__gnu_f2h_ieee"
2677                          : "__gnu_f2h_alternative"));
2678       set_conv_libfunc (sext_optab, SFmode, HFmode,
2679                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2680                          ? "__gnu_h2f_ieee"
2681                          : "__gnu_h2f_alternative"));
2682
2683       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2684                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2685                          ? "__gnu_d2h_ieee"
2686                          : "__gnu_d2h_alternative"));
2687
2688       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2689       break;
2690
2691     default:
2692       break;
2693     }
2694
2695   /* For all possible libcalls in BFmode, record NULL.  */
2696   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2697     {
2698       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2699       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2700       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2701       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2702     }
2703   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2704
2705   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2706   {
2707     const arm_fixed_mode_set fixed_arith_modes[] =
2708       {
2709         { E_QQmode, "qq" },
2710         { E_UQQmode, "uqq" },
2711         { E_HQmode, "hq" },
2712         { E_UHQmode, "uhq" },
2713         { E_SQmode, "sq" },
2714         { E_USQmode, "usq" },
2715         { E_DQmode, "dq" },
2716         { E_UDQmode, "udq" },
2717         { E_TQmode, "tq" },
2718         { E_UTQmode, "utq" },
2719         { E_HAmode, "ha" },
2720         { E_UHAmode, "uha" },
2721         { E_SAmode, "sa" },
2722         { E_USAmode, "usa" },
2723         { E_DAmode, "da" },
2724         { E_UDAmode, "uda" },
2725         { E_TAmode, "ta" },
2726         { E_UTAmode, "uta" }
2727       };
2728     const arm_fixed_mode_set fixed_conv_modes[] =
2729       {
2730         { E_QQmode, "qq" },
2731         { E_UQQmode, "uqq" },
2732         { E_HQmode, "hq" },
2733         { E_UHQmode, "uhq" },
2734         { E_SQmode, "sq" },
2735         { E_USQmode, "usq" },
2736         { E_DQmode, "dq" },
2737         { E_UDQmode, "udq" },
2738         { E_TQmode, "tq" },
2739         { E_UTQmode, "utq" },
2740         { E_HAmode, "ha" },
2741         { E_UHAmode, "uha" },
2742         { E_SAmode, "sa" },
2743         { E_USAmode, "usa" },
2744         { E_DAmode, "da" },
2745         { E_UDAmode, "uda" },
2746         { E_TAmode, "ta" },
2747         { E_UTAmode, "uta" },
2748         { E_QImode, "qi" },
2749         { E_HImode, "hi" },
2750         { E_SImode, "si" },
2751         { E_DImode, "di" },
2752         { E_TImode, "ti" },
2753         { E_SFmode, "sf" },
2754         { E_DFmode, "df" }
2755       };
2756     unsigned int i, j;
2757
2758     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2759       {
2760         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2761                                      "add", fixed_arith_modes[i].name, 3);
2762         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2763                                      "ssadd", fixed_arith_modes[i].name, 3);
2764         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2765                                      "usadd", fixed_arith_modes[i].name, 3);
2766         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2767                                      "sub", fixed_arith_modes[i].name, 3);
2768         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2769                                      "sssub", fixed_arith_modes[i].name, 3);
2770         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2771                                      "ussub", fixed_arith_modes[i].name, 3);
2772         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2773                                      "mul", fixed_arith_modes[i].name, 3);
2774         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2775                                      "ssmul", fixed_arith_modes[i].name, 3);
2776         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2777                                      "usmul", fixed_arith_modes[i].name, 3);
2778         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2779                                      "div", fixed_arith_modes[i].name, 3);
2780         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2781                                      "udiv", fixed_arith_modes[i].name, 3);
2782         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2783                                      "ssdiv", fixed_arith_modes[i].name, 3);
2784         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2785                                      "usdiv", fixed_arith_modes[i].name, 3);
2786         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2787                                      "neg", fixed_arith_modes[i].name, 2);
2788         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2789                                      "ssneg", fixed_arith_modes[i].name, 2);
2790         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2791                                      "usneg", fixed_arith_modes[i].name, 2);
2792         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2793                                      "ashl", fixed_arith_modes[i].name, 3);
2794         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2795                                      "ashr", fixed_arith_modes[i].name, 3);
2796         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2797                                      "lshr", fixed_arith_modes[i].name, 3);
2798         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2799                                      "ssashl", fixed_arith_modes[i].name, 3);
2800         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2801                                      "usashl", fixed_arith_modes[i].name, 3);
2802         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2803                                      "cmp", fixed_arith_modes[i].name, 2);
2804       }
2805
2806     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2807       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2808         {
2809           if (i == j
2810               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2811                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2812             continue;
2813
2814           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2815                                       fixed_conv_modes[j].mode, "fract",
2816                                       fixed_conv_modes[i].name,
2817                                       fixed_conv_modes[j].name);
2818           arm_set_fixed_conv_libfunc (satfract_optab,
2819                                       fixed_conv_modes[i].mode,
2820                                       fixed_conv_modes[j].mode, "satfract",
2821                                       fixed_conv_modes[i].name,
2822                                       fixed_conv_modes[j].name);
2823           arm_set_fixed_conv_libfunc (fractuns_optab,
2824                                       fixed_conv_modes[i].mode,
2825                                       fixed_conv_modes[j].mode, "fractuns",
2826                                       fixed_conv_modes[i].name,
2827                                       fixed_conv_modes[j].name);
2828           arm_set_fixed_conv_libfunc (satfractuns_optab,
2829                                       fixed_conv_modes[i].mode,
2830                                       fixed_conv_modes[j].mode, "satfractuns",
2831                                       fixed_conv_modes[i].name,
2832                                       fixed_conv_modes[j].name);
2833         }
2834   }
2835
2836   if (TARGET_AAPCS_BASED)
2837     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2838
2839   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2840 }
2841
2842 /* On AAPCS systems, this is the "struct __va_list".  */
2843 static GTY(()) tree va_list_type;
2844
2845 /* Return the type to use as __builtin_va_list.  */
2846 static tree
2847 arm_build_builtin_va_list (void)
2848 {
2849   tree va_list_name;
2850   tree ap_field;
2851
2852   if (!TARGET_AAPCS_BASED)
2853     return std_build_builtin_va_list ();
2854
2855   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2856      defined as:
2857
2858        struct __va_list
2859        {
2860          void *__ap;
2861        };
2862
2863      The C Library ABI further reinforces this definition in \S
2864      4.1.
2865
2866      We must follow this definition exactly.  The structure tag
2867      name is visible in C++ mangled names, and thus forms a part
2868      of the ABI.  The field name may be used by people who
2869      #include <stdarg.h>.  */
2870   /* Create the type.  */
2871   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2872   /* Give it the required name.  */
2873   va_list_name = build_decl (BUILTINS_LOCATION,
2874                              TYPE_DECL,
2875                              get_identifier ("__va_list"),
2876                              va_list_type);
2877   DECL_ARTIFICIAL (va_list_name) = 1;
2878   TYPE_NAME (va_list_type) = va_list_name;
2879   TYPE_STUB_DECL (va_list_type) = va_list_name;
2880   /* Create the __ap field.  */
2881   ap_field = build_decl (BUILTINS_LOCATION,
2882                          FIELD_DECL,
2883                          get_identifier ("__ap"),
2884                          ptr_type_node);
2885   DECL_ARTIFICIAL (ap_field) = 1;
2886   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2887   TYPE_FIELDS (va_list_type) = ap_field;
2888   /* Compute its layout.  */
2889   layout_type (va_list_type);
2890
2891   return va_list_type;
2892 }
2893
2894 /* Return an expression of type "void *" pointing to the next
2895    available argument in a variable-argument list.  VALIST is the
2896    user-level va_list object, of type __builtin_va_list.  */
2897 static tree
2898 arm_extract_valist_ptr (tree valist)
2899 {
2900   if (TREE_TYPE (valist) == error_mark_node)
2901     return error_mark_node;
2902
2903   /* On an AAPCS target, the pointer is stored within "struct
2904      va_list".  */
2905   if (TARGET_AAPCS_BASED)
2906     {
2907       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2908       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2909                        valist, ap_field, NULL_TREE);
2910     }
2911
2912   return valist;
2913 }
2914
2915 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2916 static void
2917 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2918 {
2919   valist = arm_extract_valist_ptr (valist);
2920   std_expand_builtin_va_start (valist, nextarg);
2921 }
2922
2923 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2924 static tree
2925 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2926                           gimple_seq *post_p)
2927 {
2928   valist = arm_extract_valist_ptr (valist);
2929   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2930 }
2931
2932 /* Check any incompatible options that the user has specified.  */
2933 static void
2934 arm_option_check_internal (struct gcc_options *opts)
2935 {
2936   int flags = opts->x_target_flags;
2937
2938   /* iWMMXt and NEON are incompatible.  */
2939   if (TARGET_IWMMXT
2940       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2941     error ("iWMMXt and NEON are incompatible");
2942
2943   /* Make sure that the processor choice does not conflict with any of the
2944      other command line choices.  */
2945   if (TARGET_ARM_P (flags)
2946       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2947     error ("target CPU does not support ARM mode");
2948
2949   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2950   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2951     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2952
2953   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2954     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2955
2956   /* If this target is normally configured to use APCS frames, warn if they
2957      are turned off and debugging is turned on.  */
2958   if (TARGET_ARM_P (flags)
2959       && write_symbols != NO_DEBUG
2960       && !TARGET_APCS_FRAME
2961       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2962     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2963              "debugging");
2964
2965   /* iWMMXt unsupported under Thumb mode.  */
2966   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2967     error ("iWMMXt unsupported under Thumb mode");
2968
2969   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2970     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2971
2972   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2973     {
2974       error ("RTP PIC is incompatible with Thumb");
2975       flag_pic = 0;
2976     }
2977
2978   if (target_pure_code || target_slow_flash_data)
2979     {
2980       const char *flag = (target_pure_code ? "-mpure-code" :
2981                                              "-mslow-flash-data");
2982       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2983
2984       /* We only support -mslow-flash-data on M-profile targets with
2985          MOVT.  */
2986       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2987         error ("%s only supports non-pic code on M-profile targets with the "
2988                "MOVT instruction", flag);
2989
2990       /* We only support -mpure-code on M-profile targets.  */
2991       if (target_pure_code && common_unsupported_modes)
2992         error ("%s only supports non-pic code on M-profile targets", flag);
2993
2994       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2995          -mword-relocations forbids relocation of MOVT/MOVW.  */
2996       if (target_word_relocations)
2997         error ("%s incompatible with %<-mword-relocations%>", flag);
2998     }
2999 }
3000
3001 /* Recompute the global settings depending on target attribute options.  */
3002
3003 static void
3004 arm_option_params_internal (void)
3005 {
3006   /* If we are not using the default (ARM mode) section anchor offset
3007      ranges, then set the correct ranges now.  */
3008   if (TARGET_THUMB1)
3009     {
3010       /* Thumb-1 LDR instructions cannot have negative offsets.
3011          Permissible positive offset ranges are 5-bit (for byte loads),
3012          6-bit (for halfword loads), or 7-bit (for word loads).
3013          Empirical results suggest a 7-bit anchor range gives the best
3014          overall code size.  */
3015       targetm.min_anchor_offset = 0;
3016       targetm.max_anchor_offset = 127;
3017     }
3018   else if (TARGET_THUMB2)
3019     {
3020       /* The minimum is set such that the total size of the block
3021          for a particular anchor is 248 + 1 + 4095 bytes, which is
3022          divisible by eight, ensuring natural spacing of anchors.  */
3023       targetm.min_anchor_offset = -248;
3024       targetm.max_anchor_offset = 4095;
3025     }
3026   else
3027     {
3028       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3029       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3030     }
3031
3032   /* Increase the number of conditional instructions with -Os.  */
3033   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3034
3035   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3036   if (TARGET_THUMB2)
3037     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3038
3039   if (TARGET_THUMB1)
3040     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3041   else
3042     targetm.md_asm_adjust = arm_md_asm_adjust;
3043 }
3044
3045 /* True if -mflip-thumb should next add an attribute for the default
3046    mode, false if it should next add an attribute for the opposite mode.  */
3047 static GTY(()) bool thumb_flipper;
3048
3049 /* Options after initial target override.  */
3050 static GTY(()) tree init_optimize;
3051
3052 static void
3053 arm_override_options_after_change_1 (struct gcc_options *opts,
3054                                      struct gcc_options *opts_set)
3055 {
3056   /* -falign-functions without argument: supply one.  */
3057   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3058     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3059       && opts->x_optimize_size ? "2" : "4";
3060 }
3061
3062 /* Implement targetm.override_options_after_change.  */
3063
3064 static void
3065 arm_override_options_after_change (void)
3066 {
3067   arm_override_options_after_change_1 (&global_options, &global_options_set);
3068 }
3069
3070 /* Implement TARGET_OPTION_RESTORE.  */
3071 static void
3072 arm_option_restore (struct gcc_options */* opts */,
3073                     struct gcc_options */* opts_set */,
3074                     struct cl_target_option *ptr)
3075 {
3076   arm_configure_build_target (&arm_active_target, ptr, false);
3077   arm_option_reconfigure_globals ();
3078 }
3079
3080 /* Reset options between modes that the user has specified.  */
3081 static void
3082 arm_option_override_internal (struct gcc_options *opts,
3083                               struct gcc_options *opts_set)
3084 {
3085   arm_override_options_after_change_1 (opts, opts_set);
3086
3087   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3088     {
3089       /* The default is to enable interworking, so this warning message would
3090          be confusing to users who have just compiled with
3091          eg, -march=armv4.  */
3092       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3093       opts->x_target_flags &= ~MASK_INTERWORK;
3094     }
3095
3096   if (TARGET_THUMB_P (opts->x_target_flags)
3097       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3098     {
3099       warning (0, "target CPU does not support THUMB instructions");
3100       opts->x_target_flags &= ~MASK_THUMB;
3101     }
3102
3103   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3104     {
3105       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3106       opts->x_target_flags &= ~MASK_APCS_FRAME;
3107     }
3108
3109   /* Callee super interworking implies thumb interworking.  Adding
3110      this to the flags here simplifies the logic elsewhere.  */
3111   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3112     opts->x_target_flags |= MASK_INTERWORK;
3113
3114   /* need to remember initial values so combinaisons of options like
3115      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3116   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3117
3118   if (! opts_set->x_arm_restrict_it)
3119     opts->x_arm_restrict_it = arm_arch8;
3120
3121   /* ARM execution state and M profile don't have [restrict] IT.  */
3122   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3123     opts->x_arm_restrict_it = 0;
3124
3125   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3126   if (!opts_set->x_arm_restrict_it
3127       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3128     opts->x_arm_restrict_it = 0;
3129
3130   /* Enable -munaligned-access by default for
3131      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3132      i.e. Thumb2 and ARM state only.
3133      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3134      - ARMv8 architecture-base processors.
3135
3136      Disable -munaligned-access by default for
3137      - all pre-ARMv6 architecture-based processors
3138      - ARMv6-M architecture-based processors
3139      - ARMv8-M Baseline processors.  */
3140
3141   if (! opts_set->x_unaligned_access)
3142     {
3143       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3144                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3145     }
3146   else if (opts->x_unaligned_access == 1
3147            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3148     {
3149       warning (0, "target CPU does not support unaligned accesses");
3150      opts->x_unaligned_access = 0;
3151     }
3152
3153   /* Don't warn since it's on by default in -O2.  */
3154   if (TARGET_THUMB1_P (opts->x_target_flags))
3155     opts->x_flag_schedule_insns = 0;
3156   else
3157     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3158
3159   /* Disable shrink-wrap when optimizing function for size, since it tends to
3160      generate additional returns.  */
3161   if (optimize_function_for_size_p (cfun)
3162       && TARGET_THUMB2_P (opts->x_target_flags))
3163     opts->x_flag_shrink_wrap = false;
3164   else
3165     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3166
3167   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3168      - epilogue_insns - does not accurately model the corresponding insns
3169      emitted in the asm file.  In particular, see the comment in thumb_exit
3170      'Find out how many of the (return) argument registers we can corrupt'.
3171      As a consequence, the epilogue may clobber registers without fipa-ra
3172      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3173      TODO: Accurately model clobbers for epilogue_insns and reenable
3174      fipa-ra.  */
3175   if (TARGET_THUMB1_P (opts->x_target_flags))
3176     opts->x_flag_ipa_ra = 0;
3177   else
3178     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3179
3180   /* Thumb2 inline assembly code should always use unified syntax.
3181      This will apply to ARM and Thumb1 eventually.  */
3182   if (TARGET_THUMB2_P (opts->x_target_flags))
3183     opts->x_inline_asm_unified = true;
3184
3185   if (arm_stack_protector_guard == SSP_GLOBAL
3186       && opts->x_arm_stack_protector_guard_offset_str)
3187     {
3188       error ("incompatible options %<-mstack-protector-guard=global%> and "
3189              "%<-mstack-protector-guard-offset=%s%>",
3190              arm_stack_protector_guard_offset_str);
3191     }
3192
3193   if (opts->x_arm_stack_protector_guard_offset_str)
3194     {
3195       char *end;
3196       const char *str = arm_stack_protector_guard_offset_str;
3197       errno = 0;
3198       long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3199       if (!*str || *end || errno)
3200         error ("%qs is not a valid offset in %qs", str,
3201                "-mstack-protector-guard-offset=");
3202       arm_stack_protector_guard_offset = offs;
3203     }
3204
3205 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3206   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3207 #endif
3208 }
3209
3210 static sbitmap isa_all_fpubits_internal;
3211 static sbitmap isa_all_fpbits;
3212 static sbitmap isa_quirkbits;
3213
3214 /* Configure a build target TARGET from the user-specified options OPTS and
3215    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3216    architecture have been specified, but the two are not identical.  */
3217 void
3218 arm_configure_build_target (struct arm_build_target *target,
3219                             struct cl_target_option *opts,
3220                             bool warn_compatible)
3221 {
3222   const cpu_option *arm_selected_tune = NULL;
3223   const arch_option *arm_selected_arch = NULL;
3224   const cpu_option *arm_selected_cpu = NULL;
3225   const arm_fpu_desc *arm_selected_fpu = NULL;
3226   const char *tune_opts = NULL;
3227   const char *arch_opts = NULL;
3228   const char *cpu_opts = NULL;
3229
3230   bitmap_clear (target->isa);
3231   target->core_name = NULL;
3232   target->arch_name = NULL;
3233
3234   if (opts->x_arm_arch_string)
3235     {
3236       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3237                                                       "-march",
3238                                                       opts->x_arm_arch_string);
3239       arch_opts = strchr (opts->x_arm_arch_string, '+');
3240     }
3241
3242   if (opts->x_arm_cpu_string)
3243     {
3244       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3245                                                     opts->x_arm_cpu_string);
3246       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3247       arm_selected_tune = arm_selected_cpu;
3248       /* If taking the tuning from -mcpu, we don't need to rescan the
3249          options for tuning.  */
3250     }
3251
3252   if (opts->x_arm_tune_string)
3253     {
3254       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3255                                                      opts->x_arm_tune_string);
3256       tune_opts = strchr (opts->x_arm_tune_string, '+');
3257     }
3258
3259   if (arm_selected_arch)
3260     {
3261       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3262       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3263                                  arch_opts);
3264
3265       if (arm_selected_cpu)
3266         {
3267           auto_sbitmap cpu_isa (isa_num_bits);
3268           auto_sbitmap isa_delta (isa_num_bits);
3269
3270           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3271           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3272                                      cpu_opts);
3273           bitmap_xor (isa_delta, cpu_isa, target->isa);
3274           /* Ignore any bits that are quirk bits.  */
3275           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3276           /* If the user (or the default configuration) has specified a
3277              specific FPU, then ignore any bits that depend on the FPU
3278              configuration.  Do similarly if using the soft-float
3279              ABI.  */
3280           if (opts->x_arm_fpu_index != TARGET_FPU_auto
3281               || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3282             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3283
3284           if (!bitmap_empty_p (isa_delta))
3285             {
3286               if (warn_compatible)
3287                 warning (0, "switch %<-mcpu=%s%> conflicts "
3288                          "with switch %<-march=%s%>",
3289                          opts->x_arm_cpu_string,
3290                          opts->x_arm_arch_string);
3291
3292               /* -march wins for code generation.
3293                  -mcpu wins for default tuning.  */
3294               if (!arm_selected_tune)
3295                 arm_selected_tune = arm_selected_cpu;
3296
3297               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3298               target->arch_name = arm_selected_arch->common.name;
3299             }
3300           else
3301             {
3302               /* Architecture and CPU are essentially the same.
3303                  Prefer the CPU setting.  */
3304               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3305               target->core_name = arm_selected_cpu->common.name;
3306               /* Copy the CPU's capabilities, so that we inherit the
3307                  appropriate extensions and quirks.  */
3308               bitmap_copy (target->isa, cpu_isa);
3309             }
3310         }
3311       else
3312         {
3313           /* Pick a CPU based on the architecture.  */
3314           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3315           target->arch_name = arm_selected_arch->common.name;
3316           /* Note: target->core_name is left unset in this path.  */
3317         }
3318     }
3319   else if (arm_selected_cpu)
3320     {
3321       target->core_name = arm_selected_cpu->common.name;
3322       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3323       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3324                                  cpu_opts);
3325       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3326     }
3327   /* If the user did not specify a processor or architecture, choose
3328      one for them.  */
3329   else
3330     {
3331       const cpu_option *sel;
3332       auto_sbitmap sought_isa (isa_num_bits);
3333       bitmap_clear (sought_isa);
3334       auto_sbitmap default_isa (isa_num_bits);
3335
3336       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3337                                                     TARGET_CPU_DEFAULT);
3338       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3339       gcc_assert (arm_selected_cpu->common.name);
3340
3341       /* RWE: All of the selection logic below (to the end of this
3342          'if' clause) looks somewhat suspect.  It appears to be mostly
3343          there to support forcing thumb support when the default CPU
3344          does not have thumb (somewhat dubious in terms of what the
3345          user might be expecting).  I think it should be removed once
3346          support for the pre-thumb era cores is removed.  */
3347       sel = arm_selected_cpu;
3348       arm_initialize_isa (default_isa, sel->common.isa_bits);
3349       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3350                                  cpu_opts);
3351
3352       /* Now check to see if the user has specified any command line
3353          switches that require certain abilities from the cpu.  */
3354
3355       if (TARGET_INTERWORK || TARGET_THUMB)
3356         bitmap_set_bit (sought_isa, isa_bit_thumb);
3357
3358       /* If there are such requirements and the default CPU does not
3359          satisfy them, we need to run over the complete list of
3360          cores looking for one that is satisfactory.  */
3361       if (!bitmap_empty_p (sought_isa)
3362           && !bitmap_subset_p (sought_isa, default_isa))
3363         {
3364           auto_sbitmap candidate_isa (isa_num_bits);
3365           /* We're only interested in a CPU with at least the
3366              capabilities of the default CPU and the required
3367              additional features.  */
3368           bitmap_ior (default_isa, default_isa, sought_isa);
3369
3370           /* Try to locate a CPU type that supports all of the abilities
3371              of the default CPU, plus the extra abilities requested by
3372              the user.  */
3373           for (sel = all_cores; sel->common.name != NULL; sel++)
3374             {
3375               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3376               /* An exact match?  */
3377               if (bitmap_equal_p (default_isa, candidate_isa))
3378                 break;
3379             }
3380
3381           if (sel->common.name == NULL)
3382             {
3383               unsigned current_bit_count = isa_num_bits;
3384               const cpu_option *best_fit = NULL;
3385
3386               /* Ideally we would like to issue an error message here
3387                  saying that it was not possible to find a CPU compatible
3388                  with the default CPU, but which also supports the command
3389                  line options specified by the programmer, and so they
3390                  ought to use the -mcpu=<name> command line option to
3391                  override the default CPU type.
3392
3393                  If we cannot find a CPU that has exactly the
3394                  characteristics of the default CPU and the given
3395                  command line options we scan the array again looking
3396                  for a best match.  The best match must have at least
3397                  the capabilities of the perfect match.  */
3398               for (sel = all_cores; sel->common.name != NULL; sel++)
3399                 {
3400                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3401
3402                   if (bitmap_subset_p (default_isa, candidate_isa))
3403                     {
3404                       unsigned count;
3405
3406                       bitmap_and_compl (candidate_isa, candidate_isa,
3407                                         default_isa);
3408                       count = bitmap_popcount (candidate_isa);
3409
3410                       if (count < current_bit_count)
3411                         {
3412                           best_fit = sel;
3413                           current_bit_count = count;
3414                         }
3415                     }
3416
3417                   gcc_assert (best_fit);
3418                   sel = best_fit;
3419                 }
3420             }
3421           arm_selected_cpu = sel;
3422         }
3423
3424       /* Now we know the CPU, we can finally initialize the target
3425          structure.  */
3426       target->core_name = arm_selected_cpu->common.name;
3427       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3428       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3429                                  cpu_opts);
3430       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3431     }
3432
3433   gcc_assert (arm_selected_cpu);
3434   gcc_assert (arm_selected_arch);
3435
3436   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3437     {
3438       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3439       auto_sbitmap fpu_bits (isa_num_bits);
3440
3441       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3442       /* This should clear out ALL bits relating to the FPU/simd
3443          extensions, to avoid potentially invalid combinations later on
3444          that we can't match.  At present we only clear out those bits
3445          that can be set by -mfpu.  This should be fixed in GCC-12.  */
3446       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3447       bitmap_ior (target->isa, target->isa, fpu_bits);
3448     }
3449
3450   /* If we have the soft-float ABI, clear any feature bits relating to use of
3451      floating-point operations.  They'll just confuse things later on.  */
3452   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3453     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3454
3455   /* There may be implied bits which we still need to enable. These are
3456      non-named features which are needed to complete other sets of features,
3457      but cannot be enabled from arm-cpus.in due to being shared between
3458      multiple fgroups. Each entry in all_implied_fbits is of the form
3459      ante -> cons, meaning that if the feature "ante" is enabled, we should
3460      implicitly enable "cons".  */
3461   const struct fbit_implication *impl = all_implied_fbits;
3462   while (impl->ante)
3463     {
3464       if (bitmap_bit_p (target->isa, impl->ante))
3465         bitmap_set_bit (target->isa, impl->cons);
3466       impl++;
3467     }
3468
3469   if (!arm_selected_tune)
3470     arm_selected_tune = arm_selected_cpu;
3471   else /* Validate the features passed to -mtune.  */
3472     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3473
3474   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3475
3476   /* Finish initializing the target structure.  */
3477   if (!target->arch_name)
3478     target->arch_name = arm_selected_arch->common.name;
3479   target->arch_pp_name = arm_selected_arch->arch;
3480   target->base_arch = arm_selected_arch->base_arch;
3481   target->profile = arm_selected_arch->profile;
3482
3483   target->tune_flags = tune_data->tune_flags;
3484   target->tune = tune_data->tune;
3485   target->tune_core = tune_data->scheduler;
3486 }
3487
3488 /* Fix up any incompatible options that the user has specified.  */
3489 static void
3490 arm_option_override (void)
3491 {
3492   static const enum isa_feature fpu_bitlist_internal[]
3493     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3494   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3495   static const enum isa_feature fp_bitlist[]
3496     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3497   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3498   cl_target_option opts;
3499
3500   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3501   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3502
3503   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3504   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3505   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3506   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3507
3508   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3509
3510   if (!OPTION_SET_P (arm_fpu_index))
3511     {
3512       bool ok;
3513       int fpu_index;
3514
3515       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3516                                   CL_TARGET);
3517       gcc_assert (ok);
3518       arm_fpu_index = (enum fpu_type) fpu_index;
3519     }
3520
3521   cl_target_option_save (&opts, &global_options, &global_options_set);
3522   arm_configure_build_target (&arm_active_target, &opts, true);
3523
3524 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3525   SUBTARGET_OVERRIDE_OPTIONS;
3526 #endif
3527
3528   /* Initialize boolean versions of the architectural flags, for use
3529      in the arm.md file and for enabling feature flags.  */
3530   arm_option_reconfigure_globals ();
3531
3532   arm_tune = arm_active_target.tune_core;
3533   tune_flags = arm_active_target.tune_flags;
3534   current_tune = arm_active_target.tune;
3535
3536   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3537   if (TARGET_APCS_FRAME)
3538     flag_shrink_wrap = false;
3539
3540   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3541     {
3542       warning (0, "%<-mapcs-stack-check%> incompatible with "
3543                "%<-mno-apcs-frame%>");
3544       target_flags |= MASK_APCS_FRAME;
3545     }
3546
3547   if (TARGET_POKE_FUNCTION_NAME)
3548     target_flags |= MASK_APCS_FRAME;
3549
3550   if (TARGET_APCS_REENT && flag_pic)
3551     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3552
3553   if (TARGET_APCS_REENT)
3554     warning (0, "APCS reentrant code not supported.  Ignored");
3555
3556   /* Set up some tuning parameters.  */
3557   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3558   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3559   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3560   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3561   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3562   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3563
3564   /* For arm2/3 there is no need to do any scheduling if we are doing
3565      software floating-point.  */
3566   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3567     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3568
3569   /* Override the default structure alignment for AAPCS ABI.  */
3570   if (!OPTION_SET_P (arm_structure_size_boundary))
3571     {
3572       if (TARGET_AAPCS_BASED)
3573         arm_structure_size_boundary = 8;
3574     }
3575   else
3576     {
3577       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3578
3579       if (arm_structure_size_boundary != 8
3580           && arm_structure_size_boundary != 32
3581           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3582         {
3583           if (ARM_DOUBLEWORD_ALIGN)
3584             warning (0,
3585                      "structure size boundary can only be set to 8, 32 or 64");
3586           else
3587             warning (0, "structure size boundary can only be set to 8 or 32");
3588           arm_structure_size_boundary
3589             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3590         }
3591     }
3592
3593   if (TARGET_VXWORKS_RTP)
3594     {
3595       if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3596         arm_pic_data_is_text_relative = 0;
3597     }
3598   else if (flag_pic
3599            && !arm_pic_data_is_text_relative
3600            && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3601     /* When text & data segments don't have a fixed displacement, the
3602        intended use is with a single, read only, pic base register.
3603        Unless the user explicitly requested not to do that, set
3604        it.  */
3605     target_flags |= MASK_SINGLE_PIC_BASE;
3606
3607   /* If stack checking is disabled, we can use r10 as the PIC register,
3608      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3609   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3610     {
3611       if (TARGET_VXWORKS_RTP)
3612         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3613       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3614     }
3615
3616   if (flag_pic && TARGET_VXWORKS_RTP)
3617     arm_pic_register = 9;
3618
3619   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3620   if (TARGET_FDPIC)
3621     {
3622       arm_pic_register = FDPIC_REGNUM;
3623       if (TARGET_THUMB1)
3624         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3625     }
3626
3627   if (arm_pic_register_string != NULL)
3628     {
3629       int pic_register = decode_reg_name (arm_pic_register_string);
3630
3631       if (!flag_pic)
3632         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3633
3634       /* Prevent the user from choosing an obviously stupid PIC register.  */
3635       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3636                || pic_register == HARD_FRAME_POINTER_REGNUM
3637                || pic_register == STACK_POINTER_REGNUM
3638                || pic_register >= PC_REGNUM
3639                || (TARGET_VXWORKS_RTP
3640                    && (unsigned int) pic_register != arm_pic_register))
3641         error ("unable to use %qs for PIC register", arm_pic_register_string);
3642       else
3643         arm_pic_register = pic_register;
3644     }
3645
3646   if (flag_pic)
3647     target_word_relocations = 1;
3648
3649   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3650   if (fix_cm3_ldrd == 2)
3651     {
3652       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3653         fix_cm3_ldrd = 1;
3654       else
3655         fix_cm3_ldrd = 0;
3656     }
3657
3658   /* Enable fix_vlldm by default if required.  */
3659   if (fix_vlldm == 2)
3660     {
3661       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3662         fix_vlldm = 1;
3663       else
3664         fix_vlldm = 0;
3665     }
3666
3667   /* Enable fix_aes by default if required.  */
3668   if (fix_aes_erratum_1742098 == 2)
3669     {
3670       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3671         fix_aes_erratum_1742098 = 1;
3672       else
3673         fix_aes_erratum_1742098 = 0;
3674     }
3675
3676   /* Hot/Cold partitioning is not currently supported, since we can't
3677      handle literal pool placement in that case.  */
3678   if (flag_reorder_blocks_and_partition)
3679     {
3680       inform (input_location,
3681               "%<-freorder-blocks-and-partition%> not supported "
3682               "on this architecture");
3683       flag_reorder_blocks_and_partition = 0;
3684       flag_reorder_blocks = 1;
3685     }
3686
3687   if (flag_pic)
3688     /* Hoisting PIC address calculations more aggressively provides a small,
3689        but measurable, size reduction for PIC code.  Therefore, we decrease
3690        the bar for unrestricted expression hoisting to the cost of PIC address
3691        calculation, which is 2 instructions.  */
3692     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3693                          param_gcse_unrestricted_cost, 2);
3694
3695   /* ARM EABI defaults to strict volatile bitfields.  */
3696   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3697       && abi_version_at_least(2))
3698     flag_strict_volatile_bitfields = 1;
3699
3700   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3701      have deemed it beneficial (signified by setting
3702      prefetch.num_slots to 1 or more).  */
3703   if (flag_prefetch_loop_arrays < 0
3704       && HAVE_prefetch
3705       && optimize >= 3
3706       && current_tune->prefetch.num_slots > 0)
3707     flag_prefetch_loop_arrays = 1;
3708
3709   /* Set up parameters to be used in prefetching algorithm.  Do not
3710      override the defaults unless we are tuning for a core we have
3711      researched values for.  */
3712   if (current_tune->prefetch.num_slots > 0)
3713     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3714                          param_simultaneous_prefetches,
3715                          current_tune->prefetch.num_slots);
3716   if (current_tune->prefetch.l1_cache_line_size >= 0)
3717     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3718                          param_l1_cache_line_size,
3719                          current_tune->prefetch.l1_cache_line_size);
3720   if (current_tune->prefetch.l1_cache_line_size >= 0)
3721     {
3722       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3723                            param_destruct_interfere_size,
3724                            current_tune->prefetch.l1_cache_line_size);
3725       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3726                            param_construct_interfere_size,
3727                            current_tune->prefetch.l1_cache_line_size);
3728     }
3729   else
3730     {
3731       /* For a generic ARM target, JF Bastien proposed using 64 for both.  */
3732       /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3733          constructive?  */
3734       /* More recent Cortex chips have a 64-byte cache line, but are marked
3735          ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults.  */
3736       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3737                            param_destruct_interfere_size, 64);
3738       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3739                            param_construct_interfere_size, 64);
3740     }
3741
3742   if (current_tune->prefetch.l1_cache_size >= 0)
3743     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3744                          param_l1_cache_size,
3745                          current_tune->prefetch.l1_cache_size);
3746
3747   /* Look through ready list and all of queue for instructions
3748      relevant for L2 auto-prefetcher.  */
3749   int sched_autopref_queue_depth;
3750
3751   switch (current_tune->sched_autopref)
3752     {
3753     case tune_params::SCHED_AUTOPREF_OFF:
3754       sched_autopref_queue_depth = -1;
3755       break;
3756
3757     case tune_params::SCHED_AUTOPREF_RANK:
3758       sched_autopref_queue_depth = 0;
3759       break;
3760
3761     case tune_params::SCHED_AUTOPREF_FULL:
3762       sched_autopref_queue_depth = max_insn_queue_index + 1;
3763       break;
3764
3765     default:
3766       gcc_unreachable ();
3767     }
3768
3769   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3770                        param_sched_autopref_queue_depth,
3771                        sched_autopref_queue_depth);
3772
3773   /* Currently, for slow flash data, we just disable literal pools.  We also
3774      disable it for pure-code.  */
3775   if (target_slow_flash_data || target_pure_code)
3776     arm_disable_literal_pool = true;
3777
3778   /* Disable scheduling fusion by default if it's not armv7 processor
3779      or doesn't prefer ldrd/strd.  */
3780   if (flag_schedule_fusion == 2
3781       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3782     flag_schedule_fusion = 0;
3783
3784   /* Need to remember initial options before they are overriden.  */
3785   init_optimize = build_optimization_node (&global_options,
3786                                            &global_options_set);
3787
3788   arm_options_perform_arch_sanity_checks ();
3789   arm_option_override_internal (&global_options, &global_options_set);
3790   arm_option_check_internal (&global_options);
3791   arm_option_params_internal ();
3792
3793   /* Create the default target_options structure.  */
3794   target_option_default_node = target_option_current_node
3795     = build_target_option_node (&global_options, &global_options_set);
3796
3797   /* Register global variables with the garbage collector.  */
3798   arm_add_gc_roots ();
3799
3800   /* Init initial mode for testing.  */
3801   thumb_flipper = TARGET_THUMB;
3802 }
3803
3804
3805 /* Reconfigure global status flags from the active_target.isa.  */
3806 void
3807 arm_option_reconfigure_globals (void)
3808 {
3809   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3810   arm_base_arch = arm_active_target.base_arch;
3811
3812   /* Initialize boolean versions of the architectural flags, for use
3813      in the arm.md file.  */
3814   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3815   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3816   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3817   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3818   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3819   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3820   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3821   arm_arch6m = arm_arch6 && !arm_arch_notm;
3822   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3823   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3824   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3825   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3826   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3827   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3828   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3829   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3830                                     isa_bit_armv8_1m_main);
3831   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3832   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3833   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3834   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3835   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3836   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3837   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3838   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3839   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3840   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3841   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3842   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3843
3844   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3845   if (arm_fp16_inst)
3846     {
3847       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3848         error ("selected fp16 options are incompatible");
3849       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3850     }
3851
3852   arm_arch_cde = 0;
3853   arm_arch_cde_coproc = 0;
3854   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3855                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3856                     isa_bit_cdecp6, isa_bit_cdecp7};
3857   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3858     {
3859       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3860       if (cde_bit)
3861         {
3862           arm_arch_cde |= cde_bit;
3863           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3864         }
3865     }
3866
3867   /* And finally, set up some quirks.  */
3868   arm_arch_no_volatile_ce
3869     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3870   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3871                                             isa_bit_quirk_armv6kz);
3872
3873   /* Use the cp15 method if it is available.  */
3874   if (target_thread_pointer == TP_AUTO)
3875     {
3876       if (arm_arch6k && !TARGET_THUMB1)
3877         target_thread_pointer = TP_CP15;
3878       else
3879         target_thread_pointer = TP_SOFT;
3880     }
3881
3882   if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3883     error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3884 }
3885
3886 /* Perform some validation between the desired architecture and the rest of the
3887    options.  */
3888 void
3889 arm_options_perform_arch_sanity_checks (void)
3890 {
3891   /* V5T code we generate is completely interworking capable, so we turn off
3892      TARGET_INTERWORK here to avoid many tests later on.  */
3893
3894   /* XXX However, we must pass the right pre-processor defines to CPP
3895      or GLD can get confused.  This is a hack.  */
3896   if (TARGET_INTERWORK)
3897     arm_cpp_interwork = 1;
3898
3899   if (arm_arch5t)
3900     target_flags &= ~MASK_INTERWORK;
3901
3902   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3903     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3904
3905   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3906     error ("iwmmxt abi requires an iwmmxt capable cpu");
3907
3908   /* BPABI targets use linker tricks to allow interworking on cores
3909      without thumb support.  */
3910   if (TARGET_INTERWORK
3911       && !TARGET_BPABI
3912       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3913     {
3914       warning (0, "target CPU does not support interworking" );
3915       target_flags &= ~MASK_INTERWORK;
3916     }
3917
3918   /* If soft-float is specified then don't use FPU.  */
3919   if (TARGET_SOFT_FLOAT)
3920     arm_fpu_attr = FPU_NONE;
3921   else
3922     arm_fpu_attr = FPU_VFP;
3923
3924   if (TARGET_AAPCS_BASED)
3925     {
3926       if (TARGET_CALLER_INTERWORKING)
3927         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3928       else
3929         if (TARGET_CALLEE_INTERWORKING)
3930           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3931     }
3932
3933   /* __fp16 support currently assumes the core has ldrh.  */
3934   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3935     sorry ("%<__fp16%> and no ldrh");
3936
3937   if (use_cmse && !arm_arch_cmse)
3938     error ("target CPU does not support ARMv8-M Security Extensions");
3939
3940   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3941      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3942   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3943     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3944
3945
3946   if (TARGET_AAPCS_BASED)
3947     {
3948       if (arm_abi == ARM_ABI_IWMMXT)
3949         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3950       else if (TARGET_HARD_FLOAT_ABI)
3951         {
3952           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3953           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3954               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3955             error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3956         }
3957       else
3958         arm_pcs_default = ARM_PCS_AAPCS;
3959     }
3960   else
3961     {
3962       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3963         sorry ("%<-mfloat-abi=hard%> and VFP");
3964
3965       if (arm_abi == ARM_ABI_APCS)
3966         arm_pcs_default = ARM_PCS_APCS;
3967       else
3968         arm_pcs_default = ARM_PCS_ATPCS;
3969     }
3970 }
3971
3972 /* Test whether a local function descriptor is canonical, i.e.,
3973    whether we can use GOTOFFFUNCDESC to compute the address of the
3974    function.  */
3975 static bool
3976 arm_fdpic_local_funcdesc_p (rtx fnx)
3977 {
3978   tree fn;
3979   enum symbol_visibility vis;
3980   bool ret;
3981
3982   if (!TARGET_FDPIC)
3983     return true;
3984
3985   if (! SYMBOL_REF_LOCAL_P (fnx))
3986     return false;
3987
3988   fn = SYMBOL_REF_DECL (fnx);
3989
3990   if (! fn)
3991     return false;
3992
3993   vis = DECL_VISIBILITY (fn);
3994
3995   if (vis == VISIBILITY_PROTECTED)
3996     /* Private function descriptors for protected functions are not
3997        canonical.  Temporarily change the visibility to global so that
3998        we can ensure uniqueness of funcdesc pointers.  */
3999     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4000
4001   ret = default_binds_local_p_1 (fn, flag_pic);
4002
4003   DECL_VISIBILITY (fn) = vis;
4004
4005   return ret;
4006 }
4007
4008 static void
4009 arm_add_gc_roots (void)
4010 {
4011   gcc_obstack_init(&minipool_obstack);
4012   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4013 }
4014 \f
4015 /* A table of known ARM exception types.
4016    For use with the interrupt function attribute.  */
4017
4018 typedef struct
4019 {
4020   const char *const arg;
4021   const unsigned long return_value;
4022 }
4023 isr_attribute_arg;
4024
4025 static const isr_attribute_arg isr_attribute_args [] =
4026 {
4027   { "IRQ",   ARM_FT_ISR },
4028   { "irq",   ARM_FT_ISR },
4029   { "FIQ",   ARM_FT_FIQ },
4030   { "fiq",   ARM_FT_FIQ },
4031   { "ABORT", ARM_FT_ISR },
4032   { "abort", ARM_FT_ISR },
4033   { "UNDEF", ARM_FT_EXCEPTION },
4034   { "undef", ARM_FT_EXCEPTION },
4035   { "SWI",   ARM_FT_EXCEPTION },
4036   { "swi",   ARM_FT_EXCEPTION },
4037   { NULL,    ARM_FT_NORMAL }
4038 };
4039
4040 /* Returns the (interrupt) function type of the current
4041    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
4042
4043 static unsigned long
4044 arm_isr_value (tree argument)
4045 {
4046   const isr_attribute_arg * ptr;
4047   const char *              arg;
4048
4049   if (!arm_arch_notm)
4050     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4051
4052   /* No argument - default to IRQ.  */
4053   if (argument == NULL_TREE)
4054     return ARM_FT_ISR;
4055
4056   /* Get the value of the argument.  */
4057   if (TREE_VALUE (argument) == NULL_TREE
4058       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4059     return ARM_FT_UNKNOWN;
4060
4061   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4062
4063   /* Check it against the list of known arguments.  */
4064   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4065     if (streq (arg, ptr->arg))
4066       return ptr->return_value;
4067
4068   /* An unrecognized interrupt type.  */
4069   return ARM_FT_UNKNOWN;
4070 }
4071
4072 /* Computes the type of the current function.  */
4073
4074 static unsigned long
4075 arm_compute_func_type (void)
4076 {
4077   unsigned long type = ARM_FT_UNKNOWN;
4078   tree a;
4079   tree attr;
4080
4081   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4082
4083   /* Decide if the current function is volatile.  Such functions
4084      never return, and many memory cycles can be saved by not storing
4085      register values that will never be needed again.  This optimization
4086      was added to speed up context switching in a kernel application.  */
4087   if (optimize > 0
4088       && (TREE_NOTHROW (current_function_decl)
4089           || !(flag_unwind_tables
4090                || (flag_exceptions
4091                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4092       && TREE_THIS_VOLATILE (current_function_decl))
4093     type |= ARM_FT_VOLATILE;
4094
4095   if (cfun->static_chain_decl != NULL)
4096     type |= ARM_FT_NESTED;
4097
4098   attr = DECL_ATTRIBUTES (current_function_decl);
4099
4100   a = lookup_attribute ("naked", attr);
4101   if (a != NULL_TREE)
4102     type |= ARM_FT_NAKED;
4103
4104   a = lookup_attribute ("isr", attr);
4105   if (a == NULL_TREE)
4106     a = lookup_attribute ("interrupt", attr);
4107
4108   if (a == NULL_TREE)
4109     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4110   else
4111     type |= arm_isr_value (TREE_VALUE (a));
4112
4113   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4114     type |= ARM_FT_CMSE_ENTRY;
4115
4116   return type;
4117 }
4118
4119 /* Returns the type of the current function.  */
4120
4121 unsigned long
4122 arm_current_func_type (void)
4123 {
4124   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4125     cfun->machine->func_type = arm_compute_func_type ();
4126
4127   return cfun->machine->func_type;
4128 }
4129
4130 bool
4131 arm_allocate_stack_slots_for_args (void)
4132 {
4133   /* Naked functions should not allocate stack slots for arguments.  */
4134   return !IS_NAKED (arm_current_func_type ());
4135 }
4136
4137 static bool
4138 arm_warn_func_return (tree decl)
4139 {
4140   /* Naked functions are implemented entirely in assembly, including the
4141      return sequence, so suppress warnings about this.  */
4142   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4143 }
4144
4145 \f
4146 /* Output assembler code for a block containing the constant parts
4147    of a trampoline, leaving space for the variable parts.
4148
4149    On the ARM, (if r8 is the static chain regnum, and remembering that
4150    referencing pc adds an offset of 8) the trampoline looks like:
4151            ldr          r8, [pc, #0]
4152            ldr          pc, [pc]
4153            .word        static chain value
4154            .word        function's address
4155    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4156
4157    In FDPIC mode, the trampoline looks like:
4158            .word        trampoline address
4159            .word        trampoline GOT address
4160            ldr          r12, [pc, #8] ; #4 for Arm mode
4161            ldr          r9,  [pc, #8] ; #4 for Arm mode
4162            ldr          pc,  [pc, #8] ; #4 for Arm mode
4163            .word        static chain value
4164            .word        GOT address
4165            .word        function's address
4166 */
4167
4168 static void
4169 arm_asm_trampoline_template (FILE *f)
4170 {
4171   fprintf (f, "\t.syntax unified\n");
4172
4173   if (TARGET_FDPIC)
4174     {
4175       /* The first two words are a function descriptor pointing to the
4176          trampoline code just below.  */
4177       if (TARGET_ARM)
4178         fprintf (f, "\t.arm\n");
4179       else if (TARGET_THUMB2)
4180         fprintf (f, "\t.thumb\n");
4181       else
4182         /* Only ARM and Thumb-2 are supported.  */
4183         gcc_unreachable ();
4184
4185       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4186       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4187       /* Trampoline code which sets the static chain register but also
4188          PIC register before jumping into real code.  */
4189       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4190                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4191                    TARGET_THUMB2 ? 8 : 4);
4192       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4193                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4194                    TARGET_THUMB2 ? 8 : 4);
4195       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4196                    PC_REGNUM, PC_REGNUM,
4197                    TARGET_THUMB2 ? 8 : 4);
4198       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4199     }
4200   else if (TARGET_ARM)
4201     {
4202       fprintf (f, "\t.arm\n");
4203       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4204       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4205     }
4206   else if (TARGET_THUMB2)
4207     {
4208       fprintf (f, "\t.thumb\n");
4209       /* The Thumb-2 trampoline is similar to the arm implementation.
4210          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4211       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4212                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4213       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4214     }
4215   else
4216     {
4217       ASM_OUTPUT_ALIGN (f, 2);
4218       fprintf (f, "\t.code\t16\n");
4219       fprintf (f, ".Ltrampoline_start:\n");
4220       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4221       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4222       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4223       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4224       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4225       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4226     }
4227   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4228   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4229 }
4230
4231 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4232
4233 static void
4234 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4235 {
4236   rtx fnaddr, mem, a_tramp;
4237
4238   emit_block_move (m_tramp, assemble_trampoline_template (),
4239                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4240
4241   if (TARGET_FDPIC)
4242     {
4243       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4244       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4245       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4246       /* The function start address is at offset 8, but in Thumb mode
4247          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4248          below.  */
4249       rtx trampoline_code_start
4250         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4251
4252       /* Write initial funcdesc which points to the trampoline.  */
4253       mem = adjust_address (m_tramp, SImode, 0);
4254       emit_move_insn (mem, trampoline_code_start);
4255       mem = adjust_address (m_tramp, SImode, 4);
4256       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4257       /* Setup static chain.  */
4258       mem = adjust_address (m_tramp, SImode, 20);
4259       emit_move_insn (mem, chain_value);
4260       /* GOT + real function entry point.  */
4261       mem = adjust_address (m_tramp, SImode, 24);
4262       emit_move_insn (mem, gotaddr);
4263       mem = adjust_address (m_tramp, SImode, 28);
4264       emit_move_insn (mem, fnaddr);
4265     }
4266   else
4267     {
4268       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4269       emit_move_insn (mem, chain_value);
4270
4271       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4272       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4273       emit_move_insn (mem, fnaddr);
4274     }
4275
4276   a_tramp = XEXP (m_tramp, 0);
4277   maybe_emit_call_builtin___clear_cache (a_tramp,
4278                                          plus_constant (ptr_mode,
4279                                                         a_tramp,
4280                                                         TRAMPOLINE_SIZE));
4281 }
4282
4283 /* Thumb trampolines should be entered in thumb mode, so set
4284    the bottom bit of the address.  */
4285
4286 static rtx
4287 arm_trampoline_adjust_address (rtx addr)
4288 {
4289   /* For FDPIC don't fix trampoline address since it's a function
4290      descriptor and not a function address.  */
4291   if (TARGET_THUMB && !TARGET_FDPIC)
4292     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4293                                 NULL, 0, OPTAB_LIB_WIDEN);
4294   return addr;
4295 }
4296 \f
4297 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4298    includes call-clobbered registers too.  If this is a leaf function
4299    we can just examine the registers used by the RTL, but otherwise we
4300    have to assume that whatever function is called might clobber
4301    anything, and so we have to save all the call-clobbered registers
4302    as well.  */
4303 static inline bool reg_needs_saving_p (unsigned reg)
4304 {
4305   unsigned long func_type = arm_current_func_type ();
4306
4307   if (IS_INTERRUPT (func_type))
4308     if (df_regs_ever_live_p (reg)
4309         /* Save call-clobbered core registers.  */
4310         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4311       return true;
4312     else
4313       return false;
4314   else
4315     if (!df_regs_ever_live_p (reg)
4316         || call_used_or_fixed_reg_p (reg))
4317       return false;
4318     else
4319       return true;
4320 }
4321
4322 /* Return 1 if it is possible to return using a single instruction.
4323    If SIBLING is non-null, this is a test for a return before a sibling
4324    call.  SIBLING is the call insn, so we can examine its register usage.  */
4325
4326 int
4327 use_return_insn (int iscond, rtx sibling)
4328 {
4329   int regno;
4330   unsigned int func_type;
4331   unsigned long saved_int_regs;
4332   unsigned HOST_WIDE_INT stack_adjust;
4333   arm_stack_offsets *offsets;
4334
4335   /* Never use a return instruction before reload has run.  */
4336   if (!reload_completed)
4337     return 0;
4338
4339   func_type = arm_current_func_type ();
4340
4341   /* Naked, volatile and stack alignment functions need special
4342      consideration.  */
4343   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4344     return 0;
4345
4346   /* So do interrupt functions that use the frame pointer and Thumb
4347      interrupt functions.  */
4348   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4349     return 0;
4350
4351   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4352       && !optimize_function_for_size_p (cfun))
4353     return 0;
4354
4355   offsets = arm_get_frame_offsets ();
4356   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4357
4358   /* As do variadic functions.  */
4359   if (crtl->args.pretend_args_size
4360       || cfun->machine->uses_anonymous_args
4361       /* Or if the function calls __builtin_eh_return () */
4362       || crtl->calls_eh_return
4363       /* Or if the function calls alloca */
4364       || cfun->calls_alloca
4365       /* Or if there is a stack adjustment.  However, if the stack pointer
4366          is saved on the stack, we can use a pre-incrementing stack load.  */
4367       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4368                                  && stack_adjust == 4))
4369       /* Or if the static chain register was saved above the frame, under the
4370          assumption that the stack pointer isn't saved on the stack.  */
4371       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4372           && arm_compute_static_chain_stack_bytes() != 0))
4373     return 0;
4374
4375   saved_int_regs = offsets->saved_regs_mask;
4376
4377   /* Unfortunately, the insn
4378
4379        ldmib sp, {..., sp, ...}
4380
4381      triggers a bug on most SA-110 based devices, such that the stack
4382      pointer won't be correctly restored if the instruction takes a
4383      page fault.  We work around this problem by popping r3 along with
4384      the other registers, since that is never slower than executing
4385      another instruction.
4386
4387      We test for !arm_arch5t here, because code for any architecture
4388      less than this could potentially be run on one of the buggy
4389      chips.  */
4390   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4391     {
4392       /* Validate that r3 is a call-clobbered register (always true in
4393          the default abi) ...  */
4394       if (!call_used_or_fixed_reg_p (3))
4395         return 0;
4396
4397       /* ... that it isn't being used for a return value ... */
4398       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4399         return 0;
4400
4401       /* ... or for a tail-call argument ...  */
4402       if (sibling)
4403         {
4404           gcc_assert (CALL_P (sibling));
4405
4406           if (find_regno_fusage (sibling, USE, 3))
4407             return 0;
4408         }
4409
4410       /* ... and that there are no call-saved registers in r0-r2
4411          (always true in the default ABI).  */
4412       if (saved_int_regs & 0x7)
4413         return 0;
4414     }
4415
4416   /* Can't be done if interworking with Thumb, and any registers have been
4417      stacked.  */
4418   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4419     return 0;
4420
4421   /* On StrongARM, conditional returns are expensive if they aren't
4422      taken and multiple registers have been stacked.  */
4423   if (iscond && arm_tune_strongarm)
4424     {
4425       /* Conditional return when just the LR is stored is a simple
4426          conditional-load instruction, that's not expensive.  */
4427       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4428         return 0;
4429
4430       if (flag_pic
4431           && arm_pic_register != INVALID_REGNUM
4432           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4433         return 0;
4434     }
4435
4436   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4437      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4438      also needs several instructions to save and restore FP context.  */
4439   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4440     return 0;
4441
4442   /* If there are saved registers but the LR isn't saved, then we need
4443      two instructions for the return.  */
4444   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4445     return 0;
4446
4447   /* Can't be done if any of the VFP regs are pushed,
4448      since this also requires an insn.  */
4449   if (TARGET_VFP_BASE)
4450     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4451       if (reg_needs_saving_p (regno))
4452         return 0;
4453
4454   if (TARGET_REALLY_IWMMXT)
4455     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4456       if (reg_needs_saving_p (regno))
4457         return 0;
4458
4459   return 1;
4460 }
4461
4462 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4463    shrink-wrapping if possible.  This is the case if we need to emit a
4464    prologue, which we can test by looking at the offsets.  */
4465 bool
4466 use_simple_return_p (void)
4467 {
4468   arm_stack_offsets *offsets;
4469
4470   /* Note this function can be called before or after reload.  */
4471   if (!reload_completed)
4472     arm_compute_frame_layout ();
4473
4474   offsets = arm_get_frame_offsets ();
4475   return offsets->outgoing_args != 0;
4476 }
4477
4478 /* Return TRUE if int I is a valid immediate ARM constant.  */
4479
4480 int
4481 const_ok_for_arm (HOST_WIDE_INT i)
4482 {
4483   int lowbit;
4484
4485   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4486      be all zero, or all one.  */
4487   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4488       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4489           != ((~(unsigned HOST_WIDE_INT) 0)
4490               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4491     return FALSE;
4492
4493   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4494
4495   /* Fast return for 0 and small values.  We must do this for zero, since
4496      the code below can't handle that one case.  */
4497   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4498     return TRUE;
4499
4500   /* Get the number of trailing zeros.  */
4501   lowbit = ffs((int) i) - 1;
4502
4503   /* Only even shifts are allowed in ARM mode so round down to the
4504      nearest even number.  */
4505   if (TARGET_ARM)
4506     lowbit &= ~1;
4507
4508   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4509     return TRUE;
4510
4511   if (TARGET_ARM)
4512     {
4513       /* Allow rotated constants in ARM mode.  */
4514       if (lowbit <= 4
4515            && ((i & ~0xc000003f) == 0
4516                || (i & ~0xf000000f) == 0
4517                || (i & ~0xfc000003) == 0))
4518         return TRUE;
4519     }
4520   else if (TARGET_THUMB2)
4521     {
4522       HOST_WIDE_INT v;
4523
4524       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4525       v = i & 0xff;
4526       v |= v << 16;
4527       if (i == v || i == (v | (v << 8)))
4528         return TRUE;
4529
4530       /* Allow repeated pattern 0xXY00XY00.  */
4531       v = i & 0xff00;
4532       v |= v << 16;
4533       if (i == v)
4534         return TRUE;
4535     }
4536   else if (TARGET_HAVE_MOVT)
4537     {
4538       /* Thumb-1 Targets with MOVT.  */
4539       if (i > 0xffff)
4540         return FALSE;
4541       else
4542         return TRUE;
4543     }
4544
4545   return FALSE;
4546 }
4547
4548 /* Return true if I is a valid constant for the operation CODE.  */
4549 int
4550 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4551 {
4552   if (const_ok_for_arm (i))
4553     return 1;
4554
4555   switch (code)
4556     {
4557     case SET:
4558       /* See if we can use movw.  */
4559       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4560         return 1;
4561       else
4562         /* Otherwise, try mvn.  */
4563         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4564
4565     case PLUS:
4566       /* See if we can use addw or subw.  */
4567       if (TARGET_THUMB2
4568           && ((i & 0xfffff000) == 0
4569               || ((-i) & 0xfffff000) == 0))
4570         return 1;
4571       /* Fall through.  */
4572     case COMPARE:
4573     case EQ:
4574     case NE:
4575     case GT:
4576     case LE:
4577     case LT:
4578     case GE:
4579     case GEU:
4580     case LTU:
4581     case GTU:
4582     case LEU:
4583     case UNORDERED:
4584     case ORDERED:
4585     case UNEQ:
4586     case UNGE:
4587     case UNLT:
4588     case UNGT:
4589     case UNLE:
4590       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4591
4592     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4593     case XOR:
4594       return 0;
4595
4596     case IOR:
4597       if (TARGET_THUMB2)
4598         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4599       return 0;
4600
4601     case AND:
4602       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4603
4604     default:
4605       gcc_unreachable ();
4606     }
4607 }
4608
4609 /* Return true if I is a valid di mode constant for the operation CODE.  */
4610 int
4611 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4612 {
4613   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4614   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4615   rtx hi = GEN_INT (hi_val);
4616   rtx lo = GEN_INT (lo_val);
4617
4618   if (TARGET_THUMB1)
4619     return 0;
4620
4621   switch (code)
4622     {
4623     case AND:
4624     case IOR:
4625     case XOR:
4626       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4627              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4628     case PLUS:
4629       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4630
4631     default:
4632       return 0;
4633     }
4634 }
4635
4636 /* Emit a sequence of insns to handle a large constant.
4637    CODE is the code of the operation required, it can be any of SET, PLUS,
4638    IOR, AND, XOR, MINUS;
4639    MODE is the mode in which the operation is being performed;
4640    VAL is the integer to operate on;
4641    SOURCE is the other operand (a register, or a null-pointer for SET);
4642    SUBTARGETS means it is safe to create scratch registers if that will
4643    either produce a simpler sequence, or we will want to cse the values.
4644    Return value is the number of insns emitted.  */
4645
4646 /* ??? Tweak this for thumb2.  */
4647 int
4648 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4649                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4650 {
4651   rtx cond;
4652
4653   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4654     cond = COND_EXEC_TEST (PATTERN (insn));
4655   else
4656     cond = NULL_RTX;
4657
4658   if (subtargets || code == SET
4659       || (REG_P (target) && REG_P (source)
4660           && REGNO (target) != REGNO (source)))
4661     {
4662       /* After arm_reorg has been called, we can't fix up expensive
4663          constants by pushing them into memory so we must synthesize
4664          them in-line, regardless of the cost.  This is only likely to
4665          be more costly on chips that have load delay slots and we are
4666          compiling without running the scheduler (so no splitting
4667          occurred before the final instruction emission).
4668
4669          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4670       */
4671       if (!cfun->machine->after_arm_reorg
4672           && !cond
4673           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4674                                 1, 0)
4675               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4676                  + (code != SET))))
4677         {
4678           if (code == SET)
4679             {
4680               /* Currently SET is the only monadic value for CODE, all
4681                  the rest are diadic.  */
4682               if (TARGET_USE_MOVT)
4683                 arm_emit_movpair (target, GEN_INT (val));
4684               else
4685                 emit_set_insn (target, GEN_INT (val));
4686
4687               return 1;
4688             }
4689           else
4690             {
4691               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4692
4693               if (TARGET_USE_MOVT)
4694                 arm_emit_movpair (temp, GEN_INT (val));
4695               else
4696                 emit_set_insn (temp, GEN_INT (val));
4697
4698               /* For MINUS, the value is subtracted from, since we never
4699                  have subtraction of a constant.  */
4700               if (code == MINUS)
4701                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4702               else
4703                 emit_set_insn (target,
4704                                gen_rtx_fmt_ee (code, mode, source, temp));
4705               return 2;
4706             }
4707         }
4708     }
4709
4710   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4711                            1);
4712 }
4713
4714 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4715    ARM/THUMB2 immediates, and add up to VAL.
4716    Thr function return value gives the number of insns required.  */
4717 static int
4718 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4719                             struct four_ints *return_sequence)
4720 {
4721   int best_consecutive_zeros = 0;
4722   int i;
4723   int best_start = 0;
4724   int insns1, insns2;
4725   struct four_ints tmp_sequence;
4726
4727   /* If we aren't targeting ARM, the best place to start is always at
4728      the bottom, otherwise look more closely.  */
4729   if (TARGET_ARM)
4730     {
4731       for (i = 0; i < 32; i += 2)
4732         {
4733           int consecutive_zeros = 0;
4734
4735           if (!(val & (3 << i)))
4736             {
4737               while ((i < 32) && !(val & (3 << i)))
4738                 {
4739                   consecutive_zeros += 2;
4740                   i += 2;
4741                 }
4742               if (consecutive_zeros > best_consecutive_zeros)
4743                 {
4744                   best_consecutive_zeros = consecutive_zeros;
4745                   best_start = i - consecutive_zeros;
4746                 }
4747               i -= 2;
4748             }
4749         }
4750     }
4751
4752   /* So long as it won't require any more insns to do so, it's
4753      desirable to emit a small constant (in bits 0...9) in the last
4754      insn.  This way there is more chance that it can be combined with
4755      a later addressing insn to form a pre-indexed load or store
4756      operation.  Consider:
4757
4758            *((volatile int *)0xe0000100) = 1;
4759            *((volatile int *)0xe0000110) = 2;
4760
4761      We want this to wind up as:
4762
4763             mov rA, #0xe0000000
4764             mov rB, #1
4765             str rB, [rA, #0x100]
4766             mov rB, #2
4767             str rB, [rA, #0x110]
4768
4769      rather than having to synthesize both large constants from scratch.
4770
4771      Therefore, we calculate how many insns would be required to emit
4772      the constant starting from `best_start', and also starting from
4773      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4774      yield a shorter sequence, we may as well use zero.  */
4775   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4776   if (best_start != 0
4777       && ((HOST_WIDE_INT_1U << best_start) < val))
4778     {
4779       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4780       if (insns2 <= insns1)
4781         {
4782           *return_sequence = tmp_sequence;
4783           insns1 = insns2;
4784         }
4785     }
4786
4787   return insns1;
4788 }
4789
4790 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4791 static int
4792 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4793                              struct four_ints *return_sequence, int i)
4794 {
4795   int remainder = val & 0xffffffff;
4796   int insns = 0;
4797
4798   /* Try and find a way of doing the job in either two or three
4799      instructions.
4800
4801      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4802      location.  We start at position I.  This may be the MSB, or
4803      optimial_immediate_sequence may have positioned it at the largest block
4804      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4805      wrapping around to the top of the word when we drop off the bottom.
4806      In the worst case this code should produce no more than four insns.
4807
4808      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4809      constants, shifted to any arbitrary location.  We should always start
4810      at the MSB.  */
4811   do
4812     {
4813       int end;
4814       unsigned int b1, b2, b3, b4;
4815       unsigned HOST_WIDE_INT result;
4816       int loc;
4817
4818       gcc_assert (insns < 4);
4819
4820       if (i <= 0)
4821         i += 32;
4822
4823       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4824       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4825         {
4826           loc = i;
4827           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4828             /* We can use addw/subw for the last 12 bits.  */
4829             result = remainder;
4830           else
4831             {
4832               /* Use an 8-bit shifted/rotated immediate.  */
4833               end = i - 8;
4834               if (end < 0)
4835                 end += 32;
4836               result = remainder & ((0x0ff << end)
4837                                    | ((i < end) ? (0xff >> (32 - end))
4838                                                 : 0));
4839               i -= 8;
4840             }
4841         }
4842       else
4843         {
4844           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4845              arbitrary shifts.  */
4846           i -= TARGET_ARM ? 2 : 1;
4847           continue;
4848         }
4849
4850       /* Next, see if we can do a better job with a thumb2 replicated
4851          constant.
4852
4853          We do it this way around to catch the cases like 0x01F001E0 where
4854          two 8-bit immediates would work, but a replicated constant would
4855          make it worse.
4856
4857          TODO: 16-bit constants that don't clear all the bits, but still win.
4858          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4859       if (TARGET_THUMB2)
4860         {
4861           b1 = (remainder & 0xff000000) >> 24;
4862           b2 = (remainder & 0x00ff0000) >> 16;
4863           b3 = (remainder & 0x0000ff00) >> 8;
4864           b4 = remainder & 0xff;
4865
4866           if (loc > 24)
4867             {
4868               /* The 8-bit immediate already found clears b1 (and maybe b2),
4869                  but must leave b3 and b4 alone.  */
4870
4871               /* First try to find a 32-bit replicated constant that clears
4872                  almost everything.  We can assume that we can't do it in one,
4873                  or else we wouldn't be here.  */
4874               unsigned int tmp = b1 & b2 & b3 & b4;
4875               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4876                                   + (tmp << 24);
4877               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4878                                             + (tmp == b3) + (tmp == b4);
4879               if (tmp
4880                   && (matching_bytes >= 3
4881                       || (matching_bytes == 2
4882                           && const_ok_for_op (remainder & ~tmp2, code))))
4883                 {
4884                   /* At least 3 of the bytes match, and the fourth has at
4885                      least as many bits set, or two of the bytes match
4886                      and it will only require one more insn to finish.  */
4887                   result = tmp2;
4888                   i = tmp != b1 ? 32
4889                       : tmp != b2 ? 24
4890                       : tmp != b3 ? 16
4891                       : 8;
4892                 }
4893
4894               /* Second, try to find a 16-bit replicated constant that can
4895                  leave three of the bytes clear.  If b2 or b4 is already
4896                  zero, then we can.  If the 8-bit from above would not
4897                  clear b2 anyway, then we still win.  */
4898               else if (b1 == b3 && (!b2 || !b4
4899                                || (remainder & 0x00ff0000 & ~result)))
4900                 {
4901                   result = remainder & 0xff00ff00;
4902                   i = 24;
4903                 }
4904             }
4905           else if (loc > 16)
4906             {
4907               /* The 8-bit immediate already found clears b2 (and maybe b3)
4908                  and we don't get here unless b1 is alredy clear, but it will
4909                  leave b4 unchanged.  */
4910
4911               /* If we can clear b2 and b4 at once, then we win, since the
4912                  8-bits couldn't possibly reach that far.  */
4913               if (b2 == b4)
4914                 {
4915                   result = remainder & 0x00ff00ff;
4916                   i = 16;
4917                 }
4918             }
4919         }
4920
4921       return_sequence->i[insns++] = result;
4922       remainder &= ~result;
4923
4924       if (code == SET || code == MINUS)
4925         code = PLUS;
4926     }
4927   while (remainder);
4928
4929   return insns;
4930 }
4931
4932 /* Emit an instruction with the indicated PATTERN.  If COND is
4933    non-NULL, conditionalize the execution of the instruction on COND
4934    being true.  */
4935
4936 static void
4937 emit_constant_insn (rtx cond, rtx pattern)
4938 {
4939   if (cond)
4940     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4941   emit_insn (pattern);
4942 }
4943
4944 /* As above, but extra parameter GENERATE which, if clear, suppresses
4945    RTL generation.  */
4946
4947 static int
4948 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4949                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4950                   int subtargets, int generate)
4951 {
4952   int can_invert = 0;
4953   int can_negate = 0;
4954   int final_invert = 0;
4955   int i;
4956   int set_sign_bit_copies = 0;
4957   int clear_sign_bit_copies = 0;
4958   int clear_zero_bit_copies = 0;
4959   int set_zero_bit_copies = 0;
4960   int insns = 0, neg_insns, inv_insns;
4961   unsigned HOST_WIDE_INT temp1, temp2;
4962   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4963   struct four_ints *immediates;
4964   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4965
4966   /* Find out which operations are safe for a given CODE.  Also do a quick
4967      check for degenerate cases; these can occur when DImode operations
4968      are split.  */
4969   switch (code)
4970     {
4971     case SET:
4972       can_invert = 1;
4973       break;
4974
4975     case PLUS:
4976       can_negate = 1;
4977       break;
4978
4979     case IOR:
4980       if (remainder == 0xffffffff)
4981         {
4982           if (generate)
4983             emit_constant_insn (cond,
4984                                 gen_rtx_SET (target,
4985                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4986           return 1;
4987         }
4988
4989       if (remainder == 0)
4990         {
4991           if (reload_completed && rtx_equal_p (target, source))
4992             return 0;
4993
4994           if (generate)
4995             emit_constant_insn (cond, gen_rtx_SET (target, source));
4996           return 1;
4997         }
4998       break;
4999
5000     case AND:
5001       if (remainder == 0)
5002         {
5003           if (generate)
5004             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5005           return 1;
5006         }
5007       if (remainder == 0xffffffff)
5008         {
5009           if (reload_completed && rtx_equal_p (target, source))
5010             return 0;
5011           if (generate)
5012             emit_constant_insn (cond, gen_rtx_SET (target, source));
5013           return 1;
5014         }
5015       can_invert = 1;
5016       break;
5017
5018     case XOR:
5019       if (remainder == 0)
5020         {
5021           if (reload_completed && rtx_equal_p (target, source))
5022             return 0;
5023           if (generate)
5024             emit_constant_insn (cond, gen_rtx_SET (target, source));
5025           return 1;
5026         }
5027
5028       if (remainder == 0xffffffff)
5029         {
5030           if (generate)
5031             emit_constant_insn (cond,
5032                                 gen_rtx_SET (target,
5033                                              gen_rtx_NOT (mode, source)));
5034           return 1;
5035         }
5036       final_invert = 1;
5037       break;
5038
5039     case MINUS:
5040       /* We treat MINUS as (val - source), since (source - val) is always
5041          passed as (source + (-val)).  */
5042       if (remainder == 0)
5043         {
5044           if (generate)
5045             emit_constant_insn (cond,
5046                                 gen_rtx_SET (target,
5047                                              gen_rtx_NEG (mode, source)));
5048           return 1;
5049         }
5050       if (const_ok_for_arm (val))
5051         {
5052           if (generate)
5053             emit_constant_insn (cond,
5054                                 gen_rtx_SET (target,
5055                                              gen_rtx_MINUS (mode, GEN_INT (val),
5056                                                             source)));
5057           return 1;
5058         }
5059
5060       break;
5061
5062     default:
5063       gcc_unreachable ();
5064     }
5065
5066   /* If we can do it in one insn get out quickly.  */
5067   if (const_ok_for_op (val, code))
5068     {
5069       if (generate)
5070         emit_constant_insn (cond,
5071                             gen_rtx_SET (target,
5072                                          (source
5073                                           ? gen_rtx_fmt_ee (code, mode, source,
5074                                                             GEN_INT (val))
5075                                           : GEN_INT (val))));
5076       return 1;
5077     }
5078
5079   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5080      insn.  */
5081   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5082       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5083     {
5084       if (generate)
5085         {
5086           if (mode == SImode && i == 16)
5087             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5088                smaller insn.  */
5089             emit_constant_insn (cond,
5090                                 gen_zero_extendhisi2
5091                                 (target, gen_lowpart (HImode, source)));
5092           else
5093             /* Extz only supports SImode, but we can coerce the operands
5094                into that mode.  */
5095             emit_constant_insn (cond,
5096                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5097                                               gen_lowpart (SImode, source),
5098                                               GEN_INT (i), const0_rtx));
5099         }
5100
5101       return 1;
5102     }
5103
5104   /* Calculate a few attributes that may be useful for specific
5105      optimizations.  */
5106   /* Count number of leading zeros.  */
5107   for (i = 31; i >= 0; i--)
5108     {
5109       if ((remainder & (1 << i)) == 0)
5110         clear_sign_bit_copies++;
5111       else
5112         break;
5113     }
5114
5115   /* Count number of leading 1's.  */
5116   for (i = 31; i >= 0; i--)
5117     {
5118       if ((remainder & (1 << i)) != 0)
5119         set_sign_bit_copies++;
5120       else
5121         break;
5122     }
5123
5124   /* Count number of trailing zero's.  */
5125   for (i = 0; i <= 31; i++)
5126     {
5127       if ((remainder & (1 << i)) == 0)
5128         clear_zero_bit_copies++;
5129       else
5130         break;
5131     }
5132
5133   /* Count number of trailing 1's.  */
5134   for (i = 0; i <= 31; i++)
5135     {
5136       if ((remainder & (1 << i)) != 0)
5137         set_zero_bit_copies++;
5138       else
5139         break;
5140     }
5141
5142   switch (code)
5143     {
5144     case SET:
5145       /* See if we can do this by sign_extending a constant that is known
5146          to be negative.  This is a good, way of doing it, since the shift
5147          may well merge into a subsequent insn.  */
5148       if (set_sign_bit_copies > 1)
5149         {
5150           if (const_ok_for_arm
5151               (temp1 = ARM_SIGN_EXTEND (remainder
5152                                         << (set_sign_bit_copies - 1))))
5153             {
5154               if (generate)
5155                 {
5156                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5157                   emit_constant_insn (cond,
5158                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5159                   emit_constant_insn (cond,
5160                                       gen_ashrsi3 (target, new_src,
5161                                                    GEN_INT (set_sign_bit_copies - 1)));
5162                 }
5163               return 2;
5164             }
5165           /* For an inverted constant, we will need to set the low bits,
5166              these will be shifted out of harm's way.  */
5167           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5168           if (const_ok_for_arm (~temp1))
5169             {
5170               if (generate)
5171                 {
5172                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5173                   emit_constant_insn (cond,
5174                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5175                   emit_constant_insn (cond,
5176                                       gen_ashrsi3 (target, new_src,
5177                                                    GEN_INT (set_sign_bit_copies - 1)));
5178                 }
5179               return 2;
5180             }
5181         }
5182
5183       /* See if we can calculate the value as the difference between two
5184          valid immediates.  */
5185       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5186         {
5187           int topshift = clear_sign_bit_copies & ~1;
5188
5189           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5190                                    & (0xff000000 >> topshift));
5191
5192           /* If temp1 is zero, then that means the 9 most significant
5193              bits of remainder were 1 and we've caused it to overflow.
5194              When topshift is 0 we don't need to do anything since we
5195              can borrow from 'bit 32'.  */
5196           if (temp1 == 0 && topshift != 0)
5197             temp1 = 0x80000000 >> (topshift - 1);
5198
5199           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5200
5201           if (const_ok_for_arm (temp2))
5202             {
5203               if (generate)
5204                 {
5205                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5206                   emit_constant_insn (cond,
5207                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5208                   emit_constant_insn (cond,
5209                                       gen_addsi3 (target, new_src,
5210                                                   GEN_INT (-temp2)));
5211                 }
5212
5213               return 2;
5214             }
5215         }
5216
5217       /* See if we can generate this by setting the bottom (or the top)
5218          16 bits, and then shifting these into the other half of the
5219          word.  We only look for the simplest cases, to do more would cost
5220          too much.  Be careful, however, not to generate this when the
5221          alternative would take fewer insns.  */
5222       if (val & 0xffff0000)
5223         {
5224           temp1 = remainder & 0xffff0000;
5225           temp2 = remainder & 0x0000ffff;
5226
5227           /* Overlaps outside this range are best done using other methods.  */
5228           for (i = 9; i < 24; i++)
5229             {
5230               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5231                   && !const_ok_for_arm (temp2))
5232                 {
5233                   rtx new_src = (subtargets
5234                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5235                                  : target);
5236                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5237                                             source, subtargets, generate);
5238                   source = new_src;
5239                   if (generate)
5240                     emit_constant_insn
5241                       (cond,
5242                        gen_rtx_SET
5243                        (target,
5244                         gen_rtx_IOR (mode,
5245                                      gen_rtx_ASHIFT (mode, source,
5246                                                      GEN_INT (i)),
5247                                      source)));
5248                   return insns + 1;
5249                 }
5250             }
5251
5252           /* Don't duplicate cases already considered.  */
5253           for (i = 17; i < 24; i++)
5254             {
5255               if (((temp1 | (temp1 >> i)) == remainder)
5256                   && !const_ok_for_arm (temp1))
5257                 {
5258                   rtx new_src = (subtargets
5259                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5260                                  : target);
5261                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5262                                             source, subtargets, generate);
5263                   source = new_src;
5264                   if (generate)
5265                     emit_constant_insn
5266                       (cond,
5267                        gen_rtx_SET (target,
5268                                     gen_rtx_IOR
5269                                     (mode,
5270                                      gen_rtx_LSHIFTRT (mode, source,
5271                                                        GEN_INT (i)),
5272                                      source)));
5273                   return insns + 1;
5274                 }
5275             }
5276         }
5277       break;
5278
5279     case IOR:
5280     case XOR:
5281       /* If we have IOR or XOR, and the constant can be loaded in a
5282          single instruction, and we can find a temporary to put it in,
5283          then this can be done in two instructions instead of 3-4.  */
5284       if (subtargets
5285           /* TARGET can't be NULL if SUBTARGETS is 0 */
5286           || (reload_completed && !reg_mentioned_p (target, source)))
5287         {
5288           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5289             {
5290               if (generate)
5291                 {
5292                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5293
5294                   emit_constant_insn (cond,
5295                                       gen_rtx_SET (sub, GEN_INT (val)));
5296                   emit_constant_insn (cond,
5297                                       gen_rtx_SET (target,
5298                                                    gen_rtx_fmt_ee (code, mode,
5299                                                                    source, sub)));
5300                 }
5301               return 2;
5302             }
5303         }
5304
5305       if (code == XOR)
5306         break;
5307
5308       /*  Convert.
5309           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5310                              and the remainder 0s for e.g. 0xfff00000)
5311           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5312
5313           This can be done in 2 instructions by using shifts with mov or mvn.
5314           e.g. for
5315           x = x | 0xfff00000;
5316           we generate.
5317           mvn   r0, r0, asl #12
5318           mvn   r0, r0, lsr #12  */
5319       if (set_sign_bit_copies > 8
5320           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5321         {
5322           if (generate)
5323             {
5324               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5325               rtx shift = GEN_INT (set_sign_bit_copies);
5326
5327               emit_constant_insn
5328                 (cond,
5329                  gen_rtx_SET (sub,
5330                               gen_rtx_NOT (mode,
5331                                            gen_rtx_ASHIFT (mode,
5332                                                            source,
5333                                                            shift))));
5334               emit_constant_insn
5335                 (cond,
5336                  gen_rtx_SET (target,
5337                               gen_rtx_NOT (mode,
5338                                            gen_rtx_LSHIFTRT (mode, sub,
5339                                                              shift))));
5340             }
5341           return 2;
5342         }
5343
5344       /* Convert
5345           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5346            to
5347           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5348
5349           For eg. r0 = r0 | 0xfff
5350                mvn      r0, r0, lsr #12
5351                mvn      r0, r0, asl #12
5352
5353       */
5354       if (set_zero_bit_copies > 8
5355           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5356         {
5357           if (generate)
5358             {
5359               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5360               rtx shift = GEN_INT (set_zero_bit_copies);
5361
5362               emit_constant_insn
5363                 (cond,
5364                  gen_rtx_SET (sub,
5365                               gen_rtx_NOT (mode,
5366                                            gen_rtx_LSHIFTRT (mode,
5367                                                              source,
5368                                                              shift))));
5369               emit_constant_insn
5370                 (cond,
5371                  gen_rtx_SET (target,
5372                               gen_rtx_NOT (mode,
5373                                            gen_rtx_ASHIFT (mode, sub,
5374                                                            shift))));
5375             }
5376           return 2;
5377         }
5378
5379       /* This will never be reached for Thumb2 because orn is a valid
5380          instruction. This is for Thumb1 and the ARM 32 bit cases.
5381
5382          x = y | constant (such that ~constant is a valid constant)
5383          Transform this to
5384          x = ~(~y & ~constant).
5385       */
5386       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5387         {
5388           if (generate)
5389             {
5390               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5391               emit_constant_insn (cond,
5392                                   gen_rtx_SET (sub,
5393                                                gen_rtx_NOT (mode, source)));
5394               source = sub;
5395               if (subtargets)
5396                 sub = gen_reg_rtx (mode);
5397               emit_constant_insn (cond,
5398                                   gen_rtx_SET (sub,
5399                                                gen_rtx_AND (mode, source,
5400                                                             GEN_INT (temp1))));
5401               emit_constant_insn (cond,
5402                                   gen_rtx_SET (target,
5403                                                gen_rtx_NOT (mode, sub)));
5404             }
5405           return 3;
5406         }
5407       break;
5408
5409     case AND:
5410       /* See if two shifts will do 2 or more insn's worth of work.  */
5411       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5412         {
5413           HOST_WIDE_INT shift_mask = ((0xffffffff
5414                                        << (32 - clear_sign_bit_copies))
5415                                       & 0xffffffff);
5416
5417           if ((remainder | shift_mask) != 0xffffffff)
5418             {
5419               HOST_WIDE_INT new_val
5420                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5421
5422               if (generate)
5423                 {
5424                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5425                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5426                                             new_src, source, subtargets, 1);
5427                   source = new_src;
5428                 }
5429               else
5430                 {
5431                   rtx targ = subtargets ? NULL_RTX : target;
5432                   insns = arm_gen_constant (AND, mode, cond, new_val,
5433                                             targ, source, subtargets, 0);
5434                 }
5435             }
5436
5437           if (generate)
5438             {
5439               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5440               rtx shift = GEN_INT (clear_sign_bit_copies);
5441
5442               emit_insn (gen_ashlsi3 (new_src, source, shift));
5443               emit_insn (gen_lshrsi3 (target, new_src, shift));
5444             }
5445
5446           return insns + 2;
5447         }
5448
5449       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5450         {
5451           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5452
5453           if ((remainder | shift_mask) != 0xffffffff)
5454             {
5455               HOST_WIDE_INT new_val
5456                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5457               if (generate)
5458                 {
5459                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5460
5461                   insns = arm_gen_constant (AND, mode, cond, new_val,
5462                                             new_src, source, subtargets, 1);
5463                   source = new_src;
5464                 }
5465               else
5466                 {
5467                   rtx targ = subtargets ? NULL_RTX : target;
5468
5469                   insns = arm_gen_constant (AND, mode, cond, new_val,
5470                                             targ, source, subtargets, 0);
5471                 }
5472             }
5473
5474           if (generate)
5475             {
5476               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5477               rtx shift = GEN_INT (clear_zero_bit_copies);
5478
5479               emit_insn (gen_lshrsi3 (new_src, source, shift));
5480               emit_insn (gen_ashlsi3 (target, new_src, shift));
5481             }
5482
5483           return insns + 2;
5484         }
5485
5486       break;
5487
5488     default:
5489       break;
5490     }
5491
5492   /* Calculate what the instruction sequences would be if we generated it
5493      normally, negated, or inverted.  */
5494   if (code == AND)
5495     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5496     insns = 99;
5497   else
5498     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5499
5500   if (can_negate)
5501     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5502                                             &neg_immediates);
5503   else
5504     neg_insns = 99;
5505
5506   if (can_invert || final_invert)
5507     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5508                                             &inv_immediates);
5509   else
5510     inv_insns = 99;
5511
5512   immediates = &pos_immediates;
5513
5514   /* Is the negated immediate sequence more efficient?  */
5515   if (neg_insns < insns && neg_insns <= inv_insns)
5516     {
5517       insns = neg_insns;
5518       immediates = &neg_immediates;
5519     }
5520   else
5521     can_negate = 0;
5522
5523   /* Is the inverted immediate sequence more efficient?
5524      We must allow for an extra NOT instruction for XOR operations, although
5525      there is some chance that the final 'mvn' will get optimized later.  */
5526   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5527     {
5528       insns = inv_insns;
5529       immediates = &inv_immediates;
5530     }
5531   else
5532     {
5533       can_invert = 0;
5534       final_invert = 0;
5535     }
5536
5537   /* Now output the chosen sequence as instructions.  */
5538   if (generate)
5539     {
5540       for (i = 0; i < insns; i++)
5541         {
5542           rtx new_src, temp1_rtx;
5543
5544           temp1 = immediates->i[i];
5545
5546           if (code == SET || code == MINUS)
5547             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5548           else if ((final_invert || i < (insns - 1)) && subtargets)
5549             new_src = gen_reg_rtx (mode);
5550           else
5551             new_src = target;
5552
5553           if (can_invert)
5554             temp1 = ~temp1;
5555           else if (can_negate)
5556             temp1 = -temp1;
5557
5558           temp1 = trunc_int_for_mode (temp1, mode);
5559           temp1_rtx = GEN_INT (temp1);
5560
5561           if (code == SET)
5562             ;
5563           else if (code == MINUS)
5564             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5565           else
5566             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5567
5568           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5569           source = new_src;
5570
5571           if (code == SET)
5572             {
5573               can_negate = can_invert;
5574               can_invert = 0;
5575               code = PLUS;
5576             }
5577           else if (code == MINUS)
5578             code = PLUS;
5579         }
5580     }
5581
5582   if (final_invert)
5583     {
5584       if (generate)
5585         emit_constant_insn (cond, gen_rtx_SET (target,
5586                                                gen_rtx_NOT (mode, source)));
5587       insns++;
5588     }
5589
5590   return insns;
5591 }
5592
5593 /* Return TRUE if op is a constant where both the low and top words are
5594    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5595    we do not have RSC in that case.  */
5596 static bool
5597 arm_const_double_prefer_rsbs_rsc (rtx op)
5598 {
5599   /* Thumb lacks RSC, so we never prefer that sequence.  */
5600   if (TARGET_THUMB || !CONST_INT_P (op))
5601     return false;
5602   HOST_WIDE_INT hi, lo;
5603   lo = UINTVAL (op) & 0xffffffffULL;
5604   hi = UINTVAL (op) >> 32;
5605   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5606 }
5607
5608 /* Canonicalize a comparison so that we are more likely to recognize it.
5609    This can be done for a few constant compares, where we can make the
5610    immediate value easier to load.  */
5611
5612 static void
5613 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5614                              bool op0_preserve_value)
5615 {
5616   machine_mode mode;
5617   unsigned HOST_WIDE_INT i, maxval;
5618
5619   mode = GET_MODE (*op0);
5620   if (mode == VOIDmode)
5621     mode = GET_MODE (*op1);
5622
5623   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5624
5625   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5626      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5627      either reversed or (for constant OP1) adjusted to GE/LT.
5628      Similarly for GTU/LEU in Thumb mode.  */
5629   if (mode == DImode)
5630     {
5631
5632       if (*code == GT || *code == LE
5633           || *code == GTU || *code == LEU)
5634         {
5635           /* Missing comparison.  First try to use an available
5636              comparison.  */
5637           if (CONST_INT_P (*op1))
5638             {
5639               i = INTVAL (*op1);
5640               switch (*code)
5641                 {
5642                 case GT:
5643                 case LE:
5644                   if (i != maxval)
5645                     {
5646                       /* Try to convert to GE/LT, unless that would be more
5647                          expensive.  */
5648                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5649                           && arm_const_double_prefer_rsbs_rsc (*op1))
5650                         return;
5651                       *op1 = GEN_INT (i + 1);
5652                       *code = *code == GT ? GE : LT;
5653                     }
5654                   else
5655                     {
5656                       /* GT maxval is always false, LE maxval is always true.
5657                          We can't fold that away here as we must make a
5658                          comparison, but we can fold them to comparisons
5659                          with the same result that can be handled:
5660                            op0 GT maxval -> op0 LT minval
5661                            op0 LE maxval -> op0 GE minval
5662                          where minval = (-maxval - 1).  */
5663                       *op1 = GEN_INT (-maxval - 1);
5664                       *code = *code == GT ? LT : GE;
5665                     }
5666                   return;
5667
5668                 case GTU:
5669                 case LEU:
5670                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5671                     {
5672                       /* Try to convert to GEU/LTU, unless that would
5673                          be more expensive.  */
5674                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5675                           && arm_const_double_prefer_rsbs_rsc (*op1))
5676                         return;
5677                       *op1 = GEN_INT (i + 1);
5678                       *code = *code == GTU ? GEU : LTU;
5679                     }
5680                   else
5681                     {
5682                       /* GTU ~0 is always false, LEU ~0 is always true.
5683                          We can't fold that away here as we must make a
5684                          comparison, but we can fold them to comparisons
5685                          with the same result that can be handled:
5686                            op0 GTU ~0 -> op0 LTU 0
5687                            op0 LEU ~0 -> op0 GEU 0.  */
5688                       *op1 = const0_rtx;
5689                       *code = *code == GTU ? LTU : GEU;
5690                     }
5691                   return;
5692
5693                 default:
5694                   gcc_unreachable ();
5695                 }
5696             }
5697
5698           if (!op0_preserve_value)
5699             {
5700               std::swap (*op0, *op1);
5701               *code = (int)swap_condition ((enum rtx_code)*code);
5702             }
5703         }
5704       return;
5705     }
5706
5707   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5708      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5709      to facilitate possible combining with a cmp into 'ands'.  */
5710   if (mode == SImode
5711       && GET_CODE (*op0) == ZERO_EXTEND
5712       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5713       && GET_MODE (XEXP (*op0, 0)) == QImode
5714       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5715       && subreg_lowpart_p (XEXP (*op0, 0))
5716       && *op1 == const0_rtx)
5717     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5718                         GEN_INT (255));
5719
5720   /* Comparisons smaller than DImode.  Only adjust comparisons against
5721      an out-of-range constant.  */
5722   if (!CONST_INT_P (*op1)
5723       || const_ok_for_arm (INTVAL (*op1))
5724       || const_ok_for_arm (- INTVAL (*op1)))
5725     return;
5726
5727   i = INTVAL (*op1);
5728
5729   switch (*code)
5730     {
5731     case EQ:
5732     case NE:
5733       return;
5734
5735     case GT:
5736     case LE:
5737       if (i != maxval
5738           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5739         {
5740           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5741           *code = *code == GT ? GE : LT;
5742           return;
5743         }
5744       break;
5745
5746     case GE:
5747     case LT:
5748       if (i != ~maxval
5749           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5750         {
5751           *op1 = GEN_INT (i - 1);
5752           *code = *code == GE ? GT : LE;
5753           return;
5754         }
5755       break;
5756
5757     case GTU:
5758     case LEU:
5759       if (i != ~((unsigned HOST_WIDE_INT) 0)
5760           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5761         {
5762           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5763           *code = *code == GTU ? GEU : LTU;
5764           return;
5765         }
5766       break;
5767
5768     case GEU:
5769     case LTU:
5770       if (i != 0
5771           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5772         {
5773           *op1 = GEN_INT (i - 1);
5774           *code = *code == GEU ? GTU : LEU;
5775           return;
5776         }
5777       break;
5778
5779     default:
5780       gcc_unreachable ();
5781     }
5782 }
5783
5784
5785 /* Define how to find the value returned by a function.  */
5786
5787 static rtx
5788 arm_function_value(const_tree type, const_tree func,
5789                    bool outgoing ATTRIBUTE_UNUSED)
5790 {
5791   machine_mode mode;
5792   int unsignedp ATTRIBUTE_UNUSED;
5793   rtx r ATTRIBUTE_UNUSED;
5794
5795   mode = TYPE_MODE (type);
5796
5797   if (TARGET_AAPCS_BASED)
5798     return aapcs_allocate_return_reg (mode, type, func);
5799
5800   /* Promote integer types.  */
5801   if (INTEGRAL_TYPE_P (type))
5802     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5803
5804   /* Promotes small structs returned in a register to full-word size
5805      for big-endian AAPCS.  */
5806   if (arm_return_in_msb (type))
5807     {
5808       HOST_WIDE_INT size = int_size_in_bytes (type);
5809       if (size % UNITS_PER_WORD != 0)
5810         {
5811           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5812           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5813         }
5814     }
5815
5816   return arm_libcall_value_1 (mode);
5817 }
5818
5819 /* libcall hashtable helpers.  */
5820
5821 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5822 {
5823   static inline hashval_t hash (const rtx_def *);
5824   static inline bool equal (const rtx_def *, const rtx_def *);
5825   static inline void remove (rtx_def *);
5826 };
5827
5828 inline bool
5829 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5830 {
5831   return rtx_equal_p (p1, p2);
5832 }
5833
5834 inline hashval_t
5835 libcall_hasher::hash (const rtx_def *p1)
5836 {
5837   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5838 }
5839
5840 typedef hash_table<libcall_hasher> libcall_table_type;
5841
5842 static void
5843 add_libcall (libcall_table_type *htab, rtx libcall)
5844 {
5845   *htab->find_slot (libcall, INSERT) = libcall;
5846 }
5847
5848 static bool
5849 arm_libcall_uses_aapcs_base (const_rtx libcall)
5850 {
5851   static bool init_done = false;
5852   static libcall_table_type *libcall_htab = NULL;
5853
5854   if (!init_done)
5855     {
5856       init_done = true;
5857
5858       libcall_htab = new libcall_table_type (31);
5859       add_libcall (libcall_htab,
5860                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5861       add_libcall (libcall_htab,
5862                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5863       add_libcall (libcall_htab,
5864                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5865       add_libcall (libcall_htab,
5866                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5867
5868       add_libcall (libcall_htab,
5869                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5870       add_libcall (libcall_htab,
5871                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5872       add_libcall (libcall_htab,
5873                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5874       add_libcall (libcall_htab,
5875                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5876
5877       add_libcall (libcall_htab,
5878                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5879       add_libcall (libcall_htab,
5880                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5881       add_libcall (libcall_htab,
5882                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5883       add_libcall (libcall_htab,
5884                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5885       add_libcall (libcall_htab,
5886                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5887       add_libcall (libcall_htab,
5888                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5889       add_libcall (libcall_htab,
5890                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5891       add_libcall (libcall_htab,
5892                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5893       add_libcall (libcall_htab,
5894                    convert_optab_libfunc (sfix_optab, SImode, SFmode));
5895       add_libcall (libcall_htab,
5896                    convert_optab_libfunc (ufix_optab, SImode, SFmode));
5897
5898       /* Values from double-precision helper functions are returned in core
5899          registers if the selected core only supports single-precision
5900          arithmetic, even if we are using the hard-float ABI.  The same is
5901          true for single-precision helpers except in case of MVE, because in
5902          MVE we will be using the hard-float ABI on a CPU which doesn't support
5903          single-precision operations in hardware.  In MVE the following check
5904          enables use of emulation for the single-precision arithmetic
5905          operations.  */
5906       if (TARGET_HAVE_MVE)
5907         {
5908           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5909           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5910           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5911           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5912           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5913           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5914           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5915           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5916           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5917           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5918           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5919         }
5920       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5921       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5922       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5923       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5924       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5925       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5926       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5927       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5928       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5929       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5930       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5931       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5932                                                         SFmode));
5933       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5934                                                         DFmode));
5935       add_libcall (libcall_htab,
5936                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5937     }
5938
5939   return libcall && libcall_htab->find (libcall) != NULL;
5940 }
5941
5942 static rtx
5943 arm_libcall_value_1 (machine_mode mode)
5944 {
5945   if (TARGET_AAPCS_BASED)
5946     return aapcs_libcall_value (mode);
5947   else if (TARGET_IWMMXT_ABI
5948            && arm_vector_mode_supported_p (mode))
5949     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5950   else
5951     return gen_rtx_REG (mode, ARG_REGISTER (1));
5952 }
5953
5954 /* Define how to find the value returned by a library function
5955    assuming the value has mode MODE.  */
5956
5957 static rtx
5958 arm_libcall_value (machine_mode mode, const_rtx libcall)
5959 {
5960   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5961       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5962     {
5963       /* The following libcalls return their result in integer registers,
5964          even though they return a floating point value.  */
5965       if (arm_libcall_uses_aapcs_base (libcall))
5966         return gen_rtx_REG (mode, ARG_REGISTER(1));
5967
5968     }
5969
5970   return arm_libcall_value_1 (mode);
5971 }
5972
5973 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5974
5975 static bool
5976 arm_function_value_regno_p (const unsigned int regno)
5977 {
5978   if (regno == ARG_REGISTER (1)
5979       || (TARGET_32BIT
5980           && TARGET_AAPCS_BASED
5981           && TARGET_HARD_FLOAT
5982           && regno == FIRST_VFP_REGNUM)
5983       || (TARGET_IWMMXT_ABI
5984           && regno == FIRST_IWMMXT_REGNUM))
5985     return true;
5986
5987   return false;
5988 }
5989
5990 /* Determine the amount of memory needed to store the possible return
5991    registers of an untyped call.  */
5992 int
5993 arm_apply_result_size (void)
5994 {
5995   int size = 16;
5996
5997   if (TARGET_32BIT)
5998     {
5999       if (TARGET_HARD_FLOAT_ABI)
6000         size += 32;
6001       if (TARGET_IWMMXT_ABI)
6002         size += 8;
6003     }
6004
6005   return size;
6006 }
6007
6008 /* Decide whether TYPE should be returned in memory (true)
6009    or in a register (false).  FNTYPE is the type of the function making
6010    the call.  */
6011 static bool
6012 arm_return_in_memory (const_tree type, const_tree fntype)
6013 {
6014   HOST_WIDE_INT size;
6015
6016   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
6017
6018   if (TARGET_AAPCS_BASED)
6019     {
6020       /* Simple, non-aggregate types (ie not including vectors and
6021          complex) are always returned in a register (or registers).
6022          We don't care about which register here, so we can short-cut
6023          some of the detail.  */
6024       if (!AGGREGATE_TYPE_P (type)
6025           && TREE_CODE (type) != VECTOR_TYPE
6026           && TREE_CODE (type) != COMPLEX_TYPE)
6027         return false;
6028
6029       /* Any return value that is no larger than one word can be
6030          returned in r0.  */
6031       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6032         return false;
6033
6034       /* Check any available co-processors to see if they accept the
6035          type as a register candidate (VFP, for example, can return
6036          some aggregates in consecutive registers).  These aren't
6037          available if the call is variadic.  */
6038       if (aapcs_select_return_coproc (type, fntype) >= 0)
6039         return false;
6040
6041       /* Vector values should be returned using ARM registers, not
6042          memory (unless they're over 16 bytes, which will break since
6043          we only have four call-clobbered registers to play with).  */
6044       if (TREE_CODE (type) == VECTOR_TYPE)
6045         return (size < 0 || size > (4 * UNITS_PER_WORD));
6046
6047       /* The rest go in memory.  */
6048       return true;
6049     }
6050
6051   if (TREE_CODE (type) == VECTOR_TYPE)
6052     return (size < 0 || size > (4 * UNITS_PER_WORD));
6053
6054   if (!AGGREGATE_TYPE_P (type) &&
6055       (TREE_CODE (type) != VECTOR_TYPE))
6056     /* All simple types are returned in registers.  */
6057     return false;
6058
6059   if (arm_abi != ARM_ABI_APCS)
6060     {
6061       /* ATPCS and later return aggregate types in memory only if they are
6062          larger than a word (or are variable size).  */
6063       return (size < 0 || size > UNITS_PER_WORD);
6064     }
6065
6066   /* For the arm-wince targets we choose to be compatible with Microsoft's
6067      ARM and Thumb compilers, which always return aggregates in memory.  */
6068 #ifndef ARM_WINCE
6069   /* All structures/unions bigger than one word are returned in memory.
6070      Also catch the case where int_size_in_bytes returns -1.  In this case
6071      the aggregate is either huge or of variable size, and in either case
6072      we will want to return it via memory and not in a register.  */
6073   if (size < 0 || size > UNITS_PER_WORD)
6074     return true;
6075
6076   if (TREE_CODE (type) == RECORD_TYPE)
6077     {
6078       tree field;
6079
6080       /* For a struct the APCS says that we only return in a register
6081          if the type is 'integer like' and every addressable element
6082          has an offset of zero.  For practical purposes this means
6083          that the structure can have at most one non bit-field element
6084          and that this element must be the first one in the structure.  */
6085
6086       /* Find the first field, ignoring non FIELD_DECL things which will
6087          have been created by C++.  */
6088       /* NOTE: This code is deprecated and has not been updated to handle
6089          DECL_FIELD_ABI_IGNORED.  */
6090       for (field = TYPE_FIELDS (type);
6091            field && TREE_CODE (field) != FIELD_DECL;
6092            field = DECL_CHAIN (field))
6093         continue;
6094
6095       if (field == NULL)
6096         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6097
6098       /* Check that the first field is valid for returning in a register.  */
6099
6100       /* ... Floats are not allowed */
6101       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6102         return true;
6103
6104       /* ... Aggregates that are not themselves valid for returning in
6105          a register are not allowed.  */
6106       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6107         return true;
6108
6109       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6110          since they are not addressable.  */
6111       for (field = DECL_CHAIN (field);
6112            field;
6113            field = DECL_CHAIN (field))
6114         {
6115           if (TREE_CODE (field) != FIELD_DECL)
6116             continue;
6117
6118           if (!DECL_BIT_FIELD_TYPE (field))
6119             return true;
6120         }
6121
6122       return false;
6123     }
6124
6125   if (TREE_CODE (type) == UNION_TYPE)
6126     {
6127       tree field;
6128
6129       /* Unions can be returned in registers if every element is
6130          integral, or can be returned in an integer register.  */
6131       for (field = TYPE_FIELDS (type);
6132            field;
6133            field = DECL_CHAIN (field))
6134         {
6135           if (TREE_CODE (field) != FIELD_DECL)
6136             continue;
6137
6138           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6139             return true;
6140
6141           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6142             return true;
6143         }
6144
6145       return false;
6146     }
6147 #endif /* not ARM_WINCE */
6148
6149   /* Return all other types in memory.  */
6150   return true;
6151 }
6152
6153 const struct pcs_attribute_arg
6154 {
6155   const char *arg;
6156   enum arm_pcs value;
6157 } pcs_attribute_args[] =
6158   {
6159     {"aapcs", ARM_PCS_AAPCS},
6160     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6161 #if 0
6162     /* We could recognize these, but changes would be needed elsewhere
6163      * to implement them.  */
6164     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6165     {"atpcs", ARM_PCS_ATPCS},
6166     {"apcs", ARM_PCS_APCS},
6167 #endif
6168     {NULL, ARM_PCS_UNKNOWN}
6169   };
6170
6171 static enum arm_pcs
6172 arm_pcs_from_attribute (tree attr)
6173 {
6174   const struct pcs_attribute_arg *ptr;
6175   const char *arg;
6176
6177   /* Get the value of the argument.  */
6178   if (TREE_VALUE (attr) == NULL_TREE
6179       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6180     return ARM_PCS_UNKNOWN;
6181
6182   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6183
6184   /* Check it against the list of known arguments.  */
6185   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6186     if (streq (arg, ptr->arg))
6187       return ptr->value;
6188
6189   /* An unrecognized interrupt type.  */
6190   return ARM_PCS_UNKNOWN;
6191 }
6192
6193 /* Get the PCS variant to use for this call.  TYPE is the function's type
6194    specification, DECL is the specific declartion.  DECL may be null if
6195    the call could be indirect or if this is a library call.  */
6196 static enum arm_pcs
6197 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6198 {
6199   bool user_convention = false;
6200   enum arm_pcs user_pcs = arm_pcs_default;
6201   tree attr;
6202
6203   gcc_assert (type);
6204
6205   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6206   if (attr)
6207     {
6208       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6209       user_convention = true;
6210     }
6211
6212   if (TARGET_AAPCS_BASED)
6213     {
6214       /* Detect varargs functions.  These always use the base rules
6215          (no argument is ever a candidate for a co-processor
6216          register).  */
6217       bool base_rules = stdarg_p (type);
6218
6219       if (user_convention)
6220         {
6221           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6222             sorry ("non-AAPCS derived PCS variant");
6223           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6224             error ("variadic functions must use the base AAPCS variant");
6225         }
6226
6227       if (base_rules)
6228         return ARM_PCS_AAPCS;
6229       else if (user_convention)
6230         return user_pcs;
6231 #if 0
6232       /* Unfortunately, this is not safe and can lead to wrong code
6233          being generated (PR96882).  Not all calls into the back-end
6234          pass the DECL, so it is unsafe to make any PCS-changing
6235          decisions based on it.  In particular the RETURN_IN_MEMORY
6236          hook is only ever passed a TYPE.  This needs revisiting to
6237          see if there are any partial improvements that can be
6238          re-enabled.  */
6239       else if (decl && flag_unit_at_a_time)
6240         {
6241           /* Local functions never leak outside this compilation unit,
6242              so we are free to use whatever conventions are
6243              appropriate.  */
6244           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6245           cgraph_node *local_info_node
6246             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6247           if (local_info_node && local_info_node->local)
6248             return ARM_PCS_AAPCS_LOCAL;
6249         }
6250 #endif
6251     }
6252   else if (user_convention && user_pcs != arm_pcs_default)
6253     sorry ("PCS variant");
6254
6255   /* For everything else we use the target's default.  */
6256   return arm_pcs_default;
6257 }
6258
6259
6260 static void
6261 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6262                     const_tree fntype ATTRIBUTE_UNUSED,
6263                     rtx libcall ATTRIBUTE_UNUSED,
6264                     const_tree fndecl ATTRIBUTE_UNUSED)
6265 {
6266   /* Record the unallocated VFP registers.  */
6267   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6268   pcum->aapcs_vfp_reg_alloc = 0;
6269 }
6270
6271 /* Bitmasks that indicate whether earlier versions of GCC would have
6272    taken a different path through the ABI logic.  This should result in
6273    a -Wpsabi warning if the earlier path led to a different ABI decision.
6274
6275    WARN_PSABI_EMPTY_CXX17_BASE
6276       Indicates that the type includes an artificial empty C++17 base field
6277       that, prior to GCC 10.1, would prevent the type from being treated as
6278       a HFA or HVA.  See PR94711 for details.
6279
6280    WARN_PSABI_NO_UNIQUE_ADDRESS
6281       Indicates that the type includes an empty [[no_unique_address]] field
6282       that, prior to GCC 10.1, would prevent the type from being treated as
6283       a HFA or HVA.  */
6284 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6285 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6286 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6287
6288 /* Walk down the type tree of TYPE counting consecutive base elements.
6289    If *MODEP is VOIDmode, then set it to the first valid floating point
6290    type.  If a non-floating point type is found, or if a floating point
6291    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6292    otherwise return the count in the sub-tree.
6293
6294    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6295    function has changed its behavior relative to earlier versions of GCC.
6296    Normally the argument should be nonnull and point to a zero-initialized
6297    variable.  The function then records whether the ABI decision might
6298    be affected by a known fix to the ABI logic, setting the associated
6299    WARN_PSABI_* bits if so.
6300
6301    When the argument is instead a null pointer, the function tries to
6302    simulate the behavior of GCC before all such ABI fixes were made.
6303    This is useful to check whether the function returns something
6304    different after the ABI fixes.  */
6305 static int
6306 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6307                          unsigned int *warn_psabi_flags)
6308 {
6309   machine_mode mode;
6310   HOST_WIDE_INT size;
6311
6312   switch (TREE_CODE (type))
6313     {
6314     case REAL_TYPE:
6315       mode = TYPE_MODE (type);
6316       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6317         return -1;
6318
6319       if (*modep == VOIDmode)
6320         *modep = mode;
6321
6322       if (*modep == mode)
6323         return 1;
6324
6325       break;
6326
6327     case COMPLEX_TYPE:
6328       mode = TYPE_MODE (TREE_TYPE (type));
6329       if (mode != DFmode && mode != SFmode)
6330         return -1;
6331
6332       if (*modep == VOIDmode)
6333         *modep = mode;
6334
6335       if (*modep == mode)
6336         return 2;
6337
6338       break;
6339
6340     case VECTOR_TYPE:
6341       /* Use V2SImode and V4SImode as representatives of all 64-bit
6342          and 128-bit vector types, whether or not those modes are
6343          supported with the present options.  */
6344       size = int_size_in_bytes (type);
6345       switch (size)
6346         {
6347         case 8:
6348           mode = V2SImode;
6349           break;
6350         case 16:
6351           mode = V4SImode;
6352           break;
6353         default:
6354           return -1;
6355         }
6356
6357       if (*modep == VOIDmode)
6358         *modep = mode;
6359
6360       /* Vector modes are considered to be opaque: two vectors are
6361          equivalent for the purposes of being homogeneous aggregates
6362          if they are the same size.  */
6363       if (*modep == mode)
6364         return 1;
6365
6366       break;
6367
6368     case ARRAY_TYPE:
6369       {
6370         int count;
6371         tree index = TYPE_DOMAIN (type);
6372
6373         /* Can't handle incomplete types nor sizes that are not
6374            fixed.  */
6375         if (!COMPLETE_TYPE_P (type)
6376             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6377           return -1;
6378
6379         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6380                                          warn_psabi_flags);
6381         if (count == -1
6382             || !index
6383             || !TYPE_MAX_VALUE (index)
6384             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6385             || !TYPE_MIN_VALUE (index)
6386             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6387             || count < 0)
6388           return -1;
6389
6390         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6391                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6392
6393         /* There must be no padding.  */
6394         if (wi::to_wide (TYPE_SIZE (type))
6395             != count * GET_MODE_BITSIZE (*modep))
6396           return -1;
6397
6398         return count;
6399       }
6400
6401     case RECORD_TYPE:
6402       {
6403         int count = 0;
6404         int sub_count;
6405         tree field;
6406
6407         /* Can't handle incomplete types nor sizes that are not
6408            fixed.  */
6409         if (!COMPLETE_TYPE_P (type)
6410             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6411           return -1;
6412
6413         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6414           {
6415             if (TREE_CODE (field) != FIELD_DECL)
6416               continue;
6417
6418             if (DECL_FIELD_ABI_IGNORED (field))
6419               {
6420                 /* See whether this is something that earlier versions of
6421                    GCC failed to ignore.  */
6422                 unsigned int flag;
6423                 if (lookup_attribute ("no_unique_address",
6424                                       DECL_ATTRIBUTES (field)))
6425                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6426                 else if (cxx17_empty_base_field_p (field))
6427                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6428                 else
6429                   /* No compatibility problem.  */
6430                   continue;
6431
6432                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6433                 if (warn_psabi_flags)
6434                   {
6435                     *warn_psabi_flags |= flag;
6436                     continue;
6437                   }
6438               }
6439             /* A zero-width bitfield may affect layout in some
6440                circumstances, but adds no members.  The determination
6441                of whether or not a type is an HFA is performed after
6442                layout is complete, so if the type still looks like an
6443                HFA afterwards, it is still classed as one.  This is
6444                potentially an ABI break for the hard-float ABI.  */
6445             else if (DECL_BIT_FIELD (field)
6446                      && integer_zerop (DECL_SIZE (field)))
6447               {
6448                 /* Prior to GCC-12 these fields were striped early,
6449                    hiding them from the back-end entirely and
6450                    resulting in the correct behaviour for argument
6451                    passing.  Simulate that old behaviour without
6452                    generating a warning.  */
6453                 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6454                   continue;
6455                 if (warn_psabi_flags)
6456                   {
6457                     *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6458                     continue;
6459                   }
6460               }
6461
6462             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6463                                                  warn_psabi_flags);
6464             if (sub_count < 0)
6465               return -1;
6466             count += sub_count;
6467           }
6468
6469         /* There must be no padding.  */
6470         if (wi::to_wide (TYPE_SIZE (type))
6471             != count * GET_MODE_BITSIZE (*modep))
6472           return -1;
6473
6474         return count;
6475       }
6476
6477     case UNION_TYPE:
6478     case QUAL_UNION_TYPE:
6479       {
6480         /* These aren't very interesting except in a degenerate case.  */
6481         int count = 0;
6482         int sub_count;
6483         tree field;
6484
6485         /* Can't handle incomplete types nor sizes that are not
6486            fixed.  */
6487         if (!COMPLETE_TYPE_P (type)
6488             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6489           return -1;
6490
6491         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6492           {
6493             if (TREE_CODE (field) != FIELD_DECL)
6494               continue;
6495
6496             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6497                                                  warn_psabi_flags);
6498             if (sub_count < 0)
6499               return -1;
6500             count = count > sub_count ? count : sub_count;
6501           }
6502
6503         /* There must be no padding.  */
6504         if (wi::to_wide (TYPE_SIZE (type))
6505             != count * GET_MODE_BITSIZE (*modep))
6506           return -1;
6507
6508         return count;
6509       }
6510
6511     default:
6512       break;
6513     }
6514
6515   return -1;
6516 }
6517
6518 /* Return true if PCS_VARIANT should use VFP registers.  */
6519 static bool
6520 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6521 {
6522   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6523     {
6524       static bool seen_thumb1_vfp = false;
6525
6526       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6527         {
6528           sorry ("Thumb-1 %<hard-float%> VFP ABI");
6529           /* sorry() is not immediately fatal, so only display this once.  */
6530           seen_thumb1_vfp = true;
6531         }
6532
6533       return true;
6534     }
6535
6536   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6537     return false;
6538
6539   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6540          (TARGET_VFP_DOUBLE || !is_double));
6541 }
6542
6543 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6544    suitable for passing or returning in VFP registers for the PCS
6545    variant selected.  If it is, then *BASE_MODE is updated to contain
6546    a machine mode describing each element of the argument's type and
6547    *COUNT to hold the number of such elements.  */
6548 static bool
6549 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6550                                        machine_mode mode, const_tree type,
6551                                        machine_mode *base_mode, int *count)
6552 {
6553   machine_mode new_mode = VOIDmode;
6554
6555   /* If we have the type information, prefer that to working things
6556      out from the mode.  */
6557   if (type)
6558     {
6559       unsigned int warn_psabi_flags = 0;
6560       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6561                                               &warn_psabi_flags);
6562       if (ag_count > 0 && ag_count <= 4)
6563         {
6564           static unsigned last_reported_type_uid;
6565           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6566           int alt;
6567           if (warn_psabi
6568               && warn_psabi_flags
6569               && uid != last_reported_type_uid
6570               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6571                   != ag_count))
6572             {
6573               const char *url10
6574                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6575               const char *url12
6576                 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6577               gcc_assert (alt == -1);
6578               last_reported_type_uid = uid;
6579               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6580                  qualification.  */
6581               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6582                 inform (input_location, "parameter passing for argument of "
6583                         "type %qT with %<[[no_unique_address]]%> members "
6584                         "changed %{in GCC 10.1%}",
6585                         TYPE_MAIN_VARIANT (type), url10);
6586               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6587                 inform (input_location, "parameter passing for argument of "
6588                         "type %qT when C++17 is enabled changed to match "
6589                         "C++14 %{in GCC 10.1%}",
6590                         TYPE_MAIN_VARIANT (type), url10);
6591               else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6592                 inform (input_location, "parameter passing for argument of "
6593                         "type %qT changed %{in GCC 12.1%}",
6594                         TYPE_MAIN_VARIANT (type), url12);
6595             }
6596           *count = ag_count;
6597         }
6598       else
6599         return false;
6600     }
6601   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6602            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6603            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6604     {
6605       *count = 1;
6606       new_mode = mode;
6607     }
6608   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6609     {
6610       *count = 2;
6611       new_mode = (mode == DCmode ? DFmode : SFmode);
6612     }
6613   else
6614     return false;
6615
6616
6617   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6618     return false;
6619
6620   *base_mode = new_mode;
6621
6622   if (TARGET_GENERAL_REGS_ONLY)
6623     error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6624            type);
6625
6626   return true;
6627 }
6628
6629 static bool
6630 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6631                                machine_mode mode, const_tree type)
6632 {
6633   int count ATTRIBUTE_UNUSED;
6634   machine_mode ag_mode ATTRIBUTE_UNUSED;
6635
6636   if (!use_vfp_abi (pcs_variant, false))
6637     return false;
6638   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6639                                                 &ag_mode, &count);
6640 }
6641
6642 static bool
6643 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6644                              const_tree type)
6645 {
6646   if (!use_vfp_abi (pcum->pcs_variant, false))
6647     return false;
6648
6649   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6650                                                 &pcum->aapcs_vfp_rmode,
6651                                                 &pcum->aapcs_vfp_rcount);
6652 }
6653
6654 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6655    for the behaviour of this function.  */
6656
6657 static bool
6658 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6659                     const_tree type  ATTRIBUTE_UNUSED)
6660 {
6661   int rmode_size
6662     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6663   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6664   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6665   int regno;
6666
6667   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6668     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6669       {
6670         pcum->aapcs_vfp_reg_alloc = mask << regno;
6671         if (mode == BLKmode
6672             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6673             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6674           {
6675             int i;
6676             int rcount = pcum->aapcs_vfp_rcount;
6677             int rshift = shift;
6678             machine_mode rmode = pcum->aapcs_vfp_rmode;
6679             rtx par;
6680             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6681               {
6682                 /* Avoid using unsupported vector modes.  */
6683                 if (rmode == V2SImode)
6684                   rmode = DImode;
6685                 else if (rmode == V4SImode)
6686                   {
6687                     rmode = DImode;
6688                     rcount *= 2;
6689                     rshift /= 2;
6690                   }
6691               }
6692             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6693             for (i = 0; i < rcount; i++)
6694               {
6695                 rtx tmp = gen_rtx_REG (rmode,
6696                                        FIRST_VFP_REGNUM + regno + i * rshift);
6697                 tmp = gen_rtx_EXPR_LIST
6698                   (VOIDmode, tmp,
6699                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6700                 XVECEXP (par, 0, i) = tmp;
6701               }
6702
6703             pcum->aapcs_reg = par;
6704           }
6705         else
6706           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6707         return true;
6708       }
6709   return false;
6710 }
6711
6712 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6713    comment there for the behaviour of this function.  */
6714
6715 static rtx
6716 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6717                                machine_mode mode,
6718                                const_tree type ATTRIBUTE_UNUSED)
6719 {
6720   if (!use_vfp_abi (pcs_variant, false))
6721     return NULL;
6722
6723   if (mode == BLKmode
6724       || (GET_MODE_CLASS (mode) == MODE_INT
6725           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6726           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6727     {
6728       int count;
6729       machine_mode ag_mode;
6730       int i;
6731       rtx par;
6732       int shift;
6733
6734       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6735                                              &ag_mode, &count);
6736
6737       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6738         {
6739           if (ag_mode == V2SImode)
6740             ag_mode = DImode;
6741           else if (ag_mode == V4SImode)
6742             {
6743               ag_mode = DImode;
6744               count *= 2;
6745             }
6746         }
6747       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6748       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6749       for (i = 0; i < count; i++)
6750         {
6751           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6752           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6753                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6754           XVECEXP (par, 0, i) = tmp;
6755         }
6756
6757       return par;
6758     }
6759
6760   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6761 }
6762
6763 static void
6764 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6765                    machine_mode mode  ATTRIBUTE_UNUSED,
6766                    const_tree type  ATTRIBUTE_UNUSED)
6767 {
6768   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6769   pcum->aapcs_vfp_reg_alloc = 0;
6770   return;
6771 }
6772
6773 #define AAPCS_CP(X)                             \
6774   {                                             \
6775     aapcs_ ## X ## _cum_init,                   \
6776     aapcs_ ## X ## _is_call_candidate,          \
6777     aapcs_ ## X ## _allocate,                   \
6778     aapcs_ ## X ## _is_return_candidate,        \
6779     aapcs_ ## X ## _allocate_return_reg,        \
6780     aapcs_ ## X ## _advance                     \
6781   }
6782
6783 /* Table of co-processors that can be used to pass arguments in
6784    registers.  Idealy no arugment should be a candidate for more than
6785    one co-processor table entry, but the table is processed in order
6786    and stops after the first match.  If that entry then fails to put
6787    the argument into a co-processor register, the argument will go on
6788    the stack.  */
6789 static struct
6790 {
6791   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6792   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6793
6794   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6795      BLKmode) is a candidate for this co-processor's registers; this
6796      function should ignore any position-dependent state in
6797      CUMULATIVE_ARGS and only use call-type dependent information.  */
6798   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6799
6800   /* Return true if the argument does get a co-processor register; it
6801      should set aapcs_reg to an RTX of the register allocated as is
6802      required for a return from FUNCTION_ARG.  */
6803   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6804
6805   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6806      be returned in this co-processor's registers.  */
6807   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6808
6809   /* Allocate and return an RTX element to hold the return type of a call.  This
6810      routine must not fail and will only be called if is_return_candidate
6811      returned true with the same parameters.  */
6812   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6813
6814   /* Finish processing this argument and prepare to start processing
6815      the next one.  */
6816   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6817 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6818   {
6819     AAPCS_CP(vfp)
6820   };
6821
6822 #undef AAPCS_CP
6823
6824 static int
6825 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6826                           const_tree type)
6827 {
6828   int i;
6829
6830   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6831     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6832       return i;
6833
6834   return -1;
6835 }
6836
6837 static int
6838 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6839 {
6840   /* We aren't passed a decl, so we can't check that a call is local.
6841      However, it isn't clear that that would be a win anyway, since it
6842      might limit some tail-calling opportunities.  */
6843   enum arm_pcs pcs_variant;
6844
6845   if (fntype)
6846     {
6847       const_tree fndecl = NULL_TREE;
6848
6849       if (TREE_CODE (fntype) == FUNCTION_DECL)
6850         {
6851           fndecl = fntype;
6852           fntype = TREE_TYPE (fntype);
6853         }
6854
6855       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6856     }
6857   else
6858     pcs_variant = arm_pcs_default;
6859
6860   if (pcs_variant != ARM_PCS_AAPCS)
6861     {
6862       int i;
6863
6864       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6865         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6866                                                         TYPE_MODE (type),
6867                                                         type))
6868           return i;
6869     }
6870   return -1;
6871 }
6872
6873 static rtx
6874 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6875                            const_tree fntype)
6876 {
6877   /* We aren't passed a decl, so we can't check that a call is local.
6878      However, it isn't clear that that would be a win anyway, since it
6879      might limit some tail-calling opportunities.  */
6880   enum arm_pcs pcs_variant;
6881   int unsignedp ATTRIBUTE_UNUSED;
6882
6883   if (fntype)
6884     {
6885       const_tree fndecl = NULL_TREE;
6886
6887       if (TREE_CODE (fntype) == FUNCTION_DECL)
6888         {
6889           fndecl = fntype;
6890           fntype = TREE_TYPE (fntype);
6891         }
6892
6893       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6894     }
6895   else
6896     pcs_variant = arm_pcs_default;
6897
6898   /* Promote integer types.  */
6899   if (type && INTEGRAL_TYPE_P (type))
6900     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6901
6902   if (pcs_variant != ARM_PCS_AAPCS)
6903     {
6904       int i;
6905
6906       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6907         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6908                                                         type))
6909           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6910                                                              mode, type);
6911     }
6912
6913   /* Promotes small structs returned in a register to full-word size
6914      for big-endian AAPCS.  */
6915   if (type && arm_return_in_msb (type))
6916     {
6917       HOST_WIDE_INT size = int_size_in_bytes (type);
6918       if (size % UNITS_PER_WORD != 0)
6919         {
6920           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6921           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6922         }
6923     }
6924
6925   return gen_rtx_REG (mode, R0_REGNUM);
6926 }
6927
6928 static rtx
6929 aapcs_libcall_value (machine_mode mode)
6930 {
6931   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6932       && GET_MODE_SIZE (mode) <= 4)
6933     mode = SImode;
6934
6935   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6936 }
6937
6938 /* Lay out a function argument using the AAPCS rules.  The rule
6939    numbers referred to here are those in the AAPCS.  */
6940 static void
6941 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6942                   const_tree type, bool named)
6943 {
6944   int nregs, nregs2;
6945   int ncrn;
6946
6947   /* We only need to do this once per argument.  */
6948   if (pcum->aapcs_arg_processed)
6949     return;
6950
6951   pcum->aapcs_arg_processed = true;
6952
6953   /* Special case: if named is false then we are handling an incoming
6954      anonymous argument which is on the stack.  */
6955   if (!named)
6956     return;
6957
6958   /* Is this a potential co-processor register candidate?  */
6959   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6960     {
6961       int slot = aapcs_select_call_coproc (pcum, mode, type);
6962       pcum->aapcs_cprc_slot = slot;
6963
6964       /* We don't have to apply any of the rules from part B of the
6965          preparation phase, these are handled elsewhere in the
6966          compiler.  */
6967
6968       if (slot >= 0)
6969         {
6970           /* A Co-processor register candidate goes either in its own
6971              class of registers or on the stack.  */
6972           if (!pcum->aapcs_cprc_failed[slot])
6973             {
6974               /* C1.cp - Try to allocate the argument to co-processor
6975                  registers.  */
6976               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6977                 return;
6978
6979               /* C2.cp - Put the argument on the stack and note that we
6980                  can't assign any more candidates in this slot.  We also
6981                  need to note that we have allocated stack space, so that
6982                  we won't later try to split a non-cprc candidate between
6983                  core registers and the stack.  */
6984               pcum->aapcs_cprc_failed[slot] = true;
6985               pcum->can_split = false;
6986             }
6987
6988           /* We didn't get a register, so this argument goes on the
6989              stack.  */
6990           gcc_assert (pcum->can_split == false);
6991           return;
6992         }
6993     }
6994
6995   /* C3 - For double-word aligned arguments, round the NCRN up to the
6996      next even number.  */
6997   ncrn = pcum->aapcs_ncrn;
6998   if (ncrn & 1)
6999     {
7000       int res = arm_needs_doubleword_align (mode, type);
7001       /* Only warn during RTL expansion of call stmts, otherwise we would
7002          warn e.g. during gimplification even on functions that will be
7003          always inlined, and we'd warn multiple times.  Don't warn when
7004          called in expand_function_start either, as we warn instead in
7005          arm_function_arg_boundary in that case.  */
7006       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7007         inform (input_location, "parameter passing for argument of type "
7008                 "%qT changed in GCC 7.1", type);
7009       else if (res > 0)
7010         ncrn++;
7011     }
7012
7013   nregs = ARM_NUM_REGS2(mode, type);
7014
7015   /* Sigh, this test should really assert that nregs > 0, but a GCC
7016      extension allows empty structs and then gives them empty size; it
7017      then allows such a structure to be passed by value.  For some of
7018      the code below we have to pretend that such an argument has
7019      non-zero size so that we 'locate' it correctly either in
7020      registers or on the stack.  */
7021   gcc_assert (nregs >= 0);
7022
7023   nregs2 = nregs ? nregs : 1;
7024
7025   /* C4 - Argument fits entirely in core registers.  */
7026   if (ncrn + nregs2 <= NUM_ARG_REGS)
7027     {
7028       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7029       pcum->aapcs_next_ncrn = ncrn + nregs;
7030       return;
7031     }
7032
7033   /* C5 - Some core registers left and there are no arguments already
7034      on the stack: split this argument between the remaining core
7035      registers and the stack.  */
7036   if (ncrn < NUM_ARG_REGS && pcum->can_split)
7037     {
7038       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7039       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7040       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7041       return;
7042     }
7043
7044   /* C6 - NCRN is set to 4.  */
7045   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7046
7047   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
7048   return;
7049 }
7050
7051 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7052    for a call to a function whose data type is FNTYPE.
7053    For a library call, FNTYPE is NULL.  */
7054 void
7055 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7056                           rtx libname,
7057                           tree fndecl ATTRIBUTE_UNUSED)
7058 {
7059   /* Long call handling.  */
7060   if (fntype)
7061     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7062   else
7063     pcum->pcs_variant = arm_pcs_default;
7064
7065   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7066     {
7067       if (arm_libcall_uses_aapcs_base (libname))
7068         pcum->pcs_variant = ARM_PCS_AAPCS;
7069
7070       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7071       pcum->aapcs_reg = NULL_RTX;
7072       pcum->aapcs_partial = 0;
7073       pcum->aapcs_arg_processed = false;
7074       pcum->aapcs_cprc_slot = -1;
7075       pcum->can_split = true;
7076
7077       if (pcum->pcs_variant != ARM_PCS_AAPCS)
7078         {
7079           int i;
7080
7081           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7082             {
7083               pcum->aapcs_cprc_failed[i] = false;
7084               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7085             }
7086         }
7087       return;
7088     }
7089
7090   /* Legacy ABIs */
7091
7092   /* On the ARM, the offset starts at 0.  */
7093   pcum->nregs = 0;
7094   pcum->iwmmxt_nregs = 0;
7095   pcum->can_split = true;
7096
7097   /* Varargs vectors are treated the same as long long.
7098      named_count avoids having to change the way arm handles 'named' */
7099   pcum->named_count = 0;
7100   pcum->nargs = 0;
7101
7102   if (TARGET_REALLY_IWMMXT && fntype)
7103     {
7104       tree fn_arg;
7105
7106       for (fn_arg = TYPE_ARG_TYPES (fntype);
7107            fn_arg;
7108            fn_arg = TREE_CHAIN (fn_arg))
7109         pcum->named_count += 1;
7110
7111       if (! pcum->named_count)
7112         pcum->named_count = INT_MAX;
7113     }
7114 }
7115
7116 /* Return 2 if double word alignment is required for argument passing,
7117    but wasn't required before the fix for PR88469.
7118    Return 1 if double word alignment is required for argument passing.
7119    Return -1 if double word alignment used to be required for argument
7120    passing before PR77728 ABI fix, but is not required anymore.
7121    Return 0 if double word alignment is not required and wasn't requried
7122    before either.  */
7123 static int
7124 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7125 {
7126   if (!type)
7127     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7128
7129   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7130   if (!AGGREGATE_TYPE_P (type))
7131     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7132
7133   /* Array types: Use member alignment of element type.  */
7134   if (TREE_CODE (type) == ARRAY_TYPE)
7135     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7136
7137   int ret = 0;
7138   int ret2 = 0;
7139   /* Record/aggregate types: Use greatest member alignment of any member.
7140
7141      Note that we explicitly consider zero-sized fields here, even though
7142      they don't map to AAPCS machine types.  For example, in:
7143
7144          struct __attribute__((aligned(8))) empty {};
7145
7146          struct s {
7147            [[no_unique_address]] empty e;
7148            int x;
7149          };
7150
7151      "s" contains only one Fundamental Data Type (the int field)
7152      but gains 8-byte alignment and size thanks to "e".  */
7153   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7154     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7155       {
7156         if (TREE_CODE (field) == FIELD_DECL)
7157           return 1;
7158         else
7159           /* Before PR77728 fix, we were incorrectly considering also
7160              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7161              Make sure we can warn about that with -Wpsabi.  */
7162           ret = -1;
7163       }
7164     else if (TREE_CODE (field) == FIELD_DECL
7165              && DECL_BIT_FIELD_TYPE (field)
7166              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7167       ret2 = 1;
7168
7169   if (ret2)
7170     return 2;
7171
7172   return ret;
7173 }
7174
7175
7176 /* Determine where to put an argument to a function.
7177    Value is zero to push the argument on the stack,
7178    or a hard register in which to store the argument.
7179
7180    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7181     the preceding args and about the function being called.
7182    ARG is a description of the argument.
7183
7184    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7185    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7186    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7187    defined), say it is passed in the stack (function_prologue will
7188    indeed make it pass in the stack if necessary).  */
7189
7190 static rtx
7191 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7192 {
7193   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7194   int nregs;
7195
7196   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7197      a call insn (op3 of a call_value insn).  */
7198   if (arg.end_marker_p ())
7199     return const0_rtx;
7200
7201   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7202     {
7203       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7204       return pcum->aapcs_reg;
7205     }
7206
7207   /* Varargs vectors are treated the same as long long.
7208      named_count avoids having to change the way arm handles 'named' */
7209   if (TARGET_IWMMXT_ABI
7210       && arm_vector_mode_supported_p (arg.mode)
7211       && pcum->named_count > pcum->nargs + 1)
7212     {
7213       if (pcum->iwmmxt_nregs <= 9)
7214         return gen_rtx_REG (arg.mode,
7215                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7216       else
7217         {
7218           pcum->can_split = false;
7219           return NULL_RTX;
7220         }
7221     }
7222
7223   /* Put doubleword aligned quantities in even register pairs.  */
7224   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7225     {
7226       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7227       if (res < 0 && warn_psabi)
7228         inform (input_location, "parameter passing for argument of type "
7229                 "%qT changed in GCC 7.1", arg.type);
7230       else if (res > 0)
7231         {
7232           pcum->nregs++;
7233           if (res > 1 && warn_psabi)
7234             inform (input_location, "parameter passing for argument of type "
7235                     "%qT changed in GCC 9.1", arg.type);
7236         }
7237     }
7238
7239   /* Only allow splitting an arg between regs and memory if all preceding
7240      args were allocated to regs.  For args passed by reference we only count
7241      the reference pointer.  */
7242   if (pcum->can_split)
7243     nregs = 1;
7244   else
7245     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7246
7247   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7248     return NULL_RTX;
7249
7250   return gen_rtx_REG (arg.mode, pcum->nregs);
7251 }
7252
7253 static unsigned int
7254 arm_function_arg_boundary (machine_mode mode, const_tree type)
7255 {
7256   if (!ARM_DOUBLEWORD_ALIGN)
7257     return PARM_BOUNDARY;
7258
7259   int res = arm_needs_doubleword_align (mode, type);
7260   if (res < 0 && warn_psabi)
7261     inform (input_location, "parameter passing for argument of type %qT "
7262             "changed in GCC 7.1", type);
7263   if (res > 1 && warn_psabi)
7264     inform (input_location, "parameter passing for argument of type "
7265             "%qT changed in GCC 9.1", type);
7266
7267   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7268 }
7269
7270 static int
7271 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7272 {
7273   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7274   int nregs = pcum->nregs;
7275
7276   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7277     {
7278       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7279       return pcum->aapcs_partial;
7280     }
7281
7282   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7283     return 0;
7284
7285   if (NUM_ARG_REGS > nregs
7286       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7287       && pcum->can_split)
7288     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7289
7290   return 0;
7291 }
7292
7293 /* Update the data in PCUM to advance over argument ARG.  */
7294
7295 static void
7296 arm_function_arg_advance (cumulative_args_t pcum_v,
7297                           const function_arg_info &arg)
7298 {
7299   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7300
7301   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7302     {
7303       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7304
7305       if (pcum->aapcs_cprc_slot >= 0)
7306         {
7307           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7308                                                               arg.type);
7309           pcum->aapcs_cprc_slot = -1;
7310         }
7311
7312       /* Generic stuff.  */
7313       pcum->aapcs_arg_processed = false;
7314       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7315       pcum->aapcs_reg = NULL_RTX;
7316       pcum->aapcs_partial = 0;
7317     }
7318   else
7319     {
7320       pcum->nargs += 1;
7321       if (arm_vector_mode_supported_p (arg.mode)
7322           && pcum->named_count > pcum->nargs
7323           && TARGET_IWMMXT_ABI)
7324         pcum->iwmmxt_nregs += 1;
7325       else
7326         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7327     }
7328 }
7329
7330 /* Variable sized types are passed by reference.  This is a GCC
7331    extension to the ARM ABI.  */
7332
7333 static bool
7334 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7335 {
7336   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7337 }
7338 \f
7339 /* Encode the current state of the #pragma [no_]long_calls.  */
7340 typedef enum
7341 {
7342   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7343   LONG,         /* #pragma long_calls is in effect.  */
7344   SHORT         /* #pragma no_long_calls is in effect.  */
7345 } arm_pragma_enum;
7346
7347 static arm_pragma_enum arm_pragma_long_calls = OFF;
7348
7349 void
7350 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7351 {
7352   arm_pragma_long_calls = LONG;
7353 }
7354
7355 void
7356 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7357 {
7358   arm_pragma_long_calls = SHORT;
7359 }
7360
7361 void
7362 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7363 {
7364   arm_pragma_long_calls = OFF;
7365 }
7366 \f
7367 /* Handle an attribute requiring a FUNCTION_DECL;
7368    arguments as in struct attribute_spec.handler.  */
7369 static tree
7370 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7371                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7372 {
7373   if (TREE_CODE (*node) != FUNCTION_DECL)
7374     {
7375       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7376                name);
7377       *no_add_attrs = true;
7378     }
7379
7380   return NULL_TREE;
7381 }
7382
7383 /* Handle an "interrupt" or "isr" attribute;
7384    arguments as in struct attribute_spec.handler.  */
7385 static tree
7386 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7387                           bool *no_add_attrs)
7388 {
7389   if (DECL_P (*node))
7390     {
7391       if (TREE_CODE (*node) != FUNCTION_DECL)
7392         {
7393           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7394                    name);
7395           *no_add_attrs = true;
7396         }
7397       else if (TARGET_VFP_BASE)
7398         {
7399           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7400                    name);
7401         }
7402       /* FIXME: the argument if any is checked for type attributes;
7403          should it be checked for decl ones?  */
7404     }
7405   else
7406     {
7407       if (TREE_CODE (*node) == FUNCTION_TYPE
7408           || TREE_CODE (*node) == METHOD_TYPE)
7409         {
7410           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7411             {
7412               warning (OPT_Wattributes, "%qE attribute ignored",
7413                        name);
7414               *no_add_attrs = true;
7415             }
7416         }
7417       else if (TREE_CODE (*node) == POINTER_TYPE
7418                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7419                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7420                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7421         {
7422           *node = build_variant_type_copy (*node);
7423           TREE_TYPE (*node) = build_type_attribute_variant
7424             (TREE_TYPE (*node),
7425              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7426           *no_add_attrs = true;
7427         }
7428       else
7429         {
7430           /* Possibly pass this attribute on from the type to a decl.  */
7431           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7432                        | (int) ATTR_FLAG_FUNCTION_NEXT
7433                        | (int) ATTR_FLAG_ARRAY_NEXT))
7434             {
7435               *no_add_attrs = true;
7436               return tree_cons (name, args, NULL_TREE);
7437             }
7438           else
7439             {
7440               warning (OPT_Wattributes, "%qE attribute ignored",
7441                        name);
7442             }
7443         }
7444     }
7445
7446   return NULL_TREE;
7447 }
7448
7449 /* Handle a "pcs" attribute; arguments as in struct
7450    attribute_spec.handler.  */
7451 static tree
7452 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7453                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7454 {
7455   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7456     {
7457       warning (OPT_Wattributes, "%qE attribute ignored", name);
7458       *no_add_attrs = true;
7459     }
7460   return NULL_TREE;
7461 }
7462
7463 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7464 /* Handle the "notshared" attribute.  This attribute is another way of
7465    requesting hidden visibility.  ARM's compiler supports
7466    "__declspec(notshared)"; we support the same thing via an
7467    attribute.  */
7468
7469 static tree
7470 arm_handle_notshared_attribute (tree *node,
7471                                 tree name ATTRIBUTE_UNUSED,
7472                                 tree args ATTRIBUTE_UNUSED,
7473                                 int flags ATTRIBUTE_UNUSED,
7474                                 bool *no_add_attrs)
7475 {
7476   tree decl = TYPE_NAME (*node);
7477
7478   if (decl)
7479     {
7480       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7481       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7482       *no_add_attrs = false;
7483     }
7484   return NULL_TREE;
7485 }
7486 #endif
7487
7488 /* This function returns true if a function with declaration FNDECL and type
7489    FNTYPE uses the stack to pass arguments or return variables and false
7490    otherwise.  This is used for functions with the attributes
7491    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7492    diagnostic messages if the stack is used.  NAME is the name of the attribute
7493    used.  */
7494
7495 static bool
7496 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7497 {
7498   function_args_iterator args_iter;
7499   CUMULATIVE_ARGS args_so_far_v;
7500   cumulative_args_t args_so_far;
7501   bool first_param = true;
7502   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7503
7504   /* Error out if any argument is passed on the stack.  */
7505   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7506   args_so_far = pack_cumulative_args (&args_so_far_v);
7507   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7508     {
7509       rtx arg_rtx;
7510
7511       prev_arg_type = arg_type;
7512       if (VOID_TYPE_P (arg_type))
7513         continue;
7514
7515       function_arg_info arg (arg_type, /*named=*/true);
7516       if (!first_param)
7517         /* ??? We should advance after processing the argument and pass
7518            the argument we're advancing past.  */
7519         arm_function_arg_advance (args_so_far, arg);
7520       arg_rtx = arm_function_arg (args_so_far, arg);
7521       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7522         {
7523           error ("%qE attribute not available to functions with arguments "
7524                  "passed on the stack", name);
7525           return true;
7526         }
7527       first_param = false;
7528     }
7529
7530   /* Error out for variadic functions since we cannot control how many
7531      arguments will be passed and thus stack could be used.  stdarg_p () is not
7532      used for the checking to avoid browsing arguments twice.  */
7533   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7534     {
7535       error ("%qE attribute not available to functions with variable number "
7536              "of arguments", name);
7537       return true;
7538     }
7539
7540   /* Error out if return value is passed on the stack.  */
7541   ret_type = TREE_TYPE (fntype);
7542   if (arm_return_in_memory (ret_type, fntype))
7543     {
7544       error ("%qE attribute not available to functions that return value on "
7545              "the stack", name);
7546       return true;
7547     }
7548   return false;
7549 }
7550
7551 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7552    function will check whether the attribute is allowed here and will add the
7553    attribute to the function declaration tree or otherwise issue a warning.  */
7554
7555 static tree
7556 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7557                                  tree /* args */,
7558                                  int /* flags */,
7559                                  bool *no_add_attrs)
7560 {
7561   tree fndecl;
7562
7563   if (!use_cmse)
7564     {
7565       *no_add_attrs = true;
7566       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7567                "option", name);
7568       return NULL_TREE;
7569     }
7570
7571   /* Ignore attribute for function types.  */
7572   if (TREE_CODE (*node) != FUNCTION_DECL)
7573     {
7574       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7575                name);
7576       *no_add_attrs = true;
7577       return NULL_TREE;
7578     }
7579
7580   fndecl = *node;
7581
7582   /* Warn for static linkage functions.  */
7583   if (!TREE_PUBLIC (fndecl))
7584     {
7585       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7586                "with static linkage", name);
7587       *no_add_attrs = true;
7588       return NULL_TREE;
7589     }
7590
7591   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7592                                                 TREE_TYPE (fndecl));
7593   return NULL_TREE;
7594 }
7595
7596
7597 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7598    function will check whether the attribute is allowed here and will add the
7599    attribute to the function type tree or otherwise issue a diagnostic.  The
7600    reason we check this at declaration time is to only allow the use of the
7601    attribute with declarations of function pointers and not function
7602    declarations.  This function checks NODE is of the expected type and issues
7603    diagnostics otherwise using NAME.  If it is not of the expected type
7604    *NO_ADD_ATTRS will be set to true.  */
7605
7606 static tree
7607 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7608                                  tree /* args */,
7609                                  int /* flags */,
7610                                  bool *no_add_attrs)
7611 {
7612   tree decl = NULL_TREE, fntype = NULL_TREE;
7613   tree type;
7614
7615   if (!use_cmse)
7616     {
7617       *no_add_attrs = true;
7618       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7619                "option", name);
7620       return NULL_TREE;
7621     }
7622
7623   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7624     {
7625       decl = *node;
7626       fntype = TREE_TYPE (decl);
7627     }
7628
7629   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7630     fntype = TREE_TYPE (fntype);
7631
7632   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7633     {
7634         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7635                  "function pointer", name);
7636         *no_add_attrs = true;
7637         return NULL_TREE;
7638     }
7639
7640   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7641
7642   if (*no_add_attrs)
7643     return NULL_TREE;
7644
7645   /* Prevent trees being shared among function types with and without
7646      cmse_nonsecure_call attribute.  */
7647   type = TREE_TYPE (decl);
7648
7649   type = build_distinct_type_copy (type);
7650   TREE_TYPE (decl) = type;
7651   fntype = type;
7652
7653   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7654     {
7655       type = fntype;
7656       fntype = TREE_TYPE (fntype);
7657       fntype = build_distinct_type_copy (fntype);
7658       TREE_TYPE (type) = fntype;
7659     }
7660
7661   /* Construct a type attribute and add it to the function type.  */
7662   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7663                           TYPE_ATTRIBUTES (fntype));
7664   TYPE_ATTRIBUTES (fntype) = attrs;
7665   return NULL_TREE;
7666 }
7667
7668 /* Return 0 if the attributes for two types are incompatible, 1 if they
7669    are compatible, and 2 if they are nearly compatible (which causes a
7670    warning to be generated).  */
7671 static int
7672 arm_comp_type_attributes (const_tree type1, const_tree type2)
7673 {
7674   int l1, l2, s1, s2;
7675
7676   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7677                                   TYPE_ATTRIBUTES (type1));
7678   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7679                                   TYPE_ATTRIBUTES (type2));
7680   if (bool (attrs1) != bool (attrs2))
7681     return 0;
7682   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7683     return 0;
7684
7685   /* Check for mismatch of non-default calling convention.  */
7686   if (TREE_CODE (type1) != FUNCTION_TYPE)
7687     return 1;
7688
7689   /* Check for mismatched call attributes.  */
7690   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7691   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7692   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7693   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7694
7695   /* Only bother to check if an attribute is defined.  */
7696   if (l1 | l2 | s1 | s2)
7697     {
7698       /* If one type has an attribute, the other must have the same attribute.  */
7699       if ((l1 != l2) || (s1 != s2))
7700         return 0;
7701
7702       /* Disallow mixed attributes.  */
7703       if ((l1 & s2) || (l2 & s1))
7704         return 0;
7705     }
7706
7707   /* Check for mismatched ISR attribute.  */
7708   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7709   if (! l1)
7710     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7711   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7712   if (! l2)
7713     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7714   if (l1 != l2)
7715     return 0;
7716
7717   l1 = lookup_attribute ("cmse_nonsecure_call",
7718                          TYPE_ATTRIBUTES (type1)) != NULL;
7719   l2 = lookup_attribute ("cmse_nonsecure_call",
7720                          TYPE_ATTRIBUTES (type2)) != NULL;
7721
7722   if (l1 != l2)
7723     return 0;
7724
7725   return 1;
7726 }
7727
7728 /*  Assigns default attributes to newly defined type.  This is used to
7729     set short_call/long_call attributes for function types of
7730     functions defined inside corresponding #pragma scopes.  */
7731 static void
7732 arm_set_default_type_attributes (tree type)
7733 {
7734   /* Add __attribute__ ((long_call)) to all functions, when
7735      inside #pragma long_calls or __attribute__ ((short_call)),
7736      when inside #pragma no_long_calls.  */
7737   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7738     {
7739       tree type_attr_list, attr_name;
7740       type_attr_list = TYPE_ATTRIBUTES (type);
7741
7742       if (arm_pragma_long_calls == LONG)
7743         attr_name = get_identifier ("long_call");
7744       else if (arm_pragma_long_calls == SHORT)
7745         attr_name = get_identifier ("short_call");
7746       else
7747         return;
7748
7749       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7750       TYPE_ATTRIBUTES (type) = type_attr_list;
7751     }
7752 }
7753 \f
7754 /* Return true if DECL is known to be linked into section SECTION.  */
7755
7756 static bool
7757 arm_function_in_section_p (tree decl, section *section)
7758 {
7759   /* We can only be certain about the prevailing symbol definition.  */
7760   if (!decl_binds_to_current_def_p (decl))
7761     return false;
7762
7763   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7764   if (!DECL_SECTION_NAME (decl))
7765     {
7766       /* Make sure that we will not create a unique section for DECL.  */
7767       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7768         return false;
7769     }
7770
7771   return function_section (decl) == section;
7772 }
7773
7774 /* Return nonzero if a 32-bit "long_call" should be generated for
7775    a call from the current function to DECL.  We generate a long_call
7776    if the function:
7777
7778         a.  has an __attribute__((long call))
7779      or b.  is within the scope of a #pragma long_calls
7780      or c.  the -mlong-calls command line switch has been specified
7781
7782    However we do not generate a long call if the function:
7783
7784         d.  has an __attribute__ ((short_call))
7785      or e.  is inside the scope of a #pragma no_long_calls
7786      or f.  is defined in the same section as the current function.  */
7787
7788 bool
7789 arm_is_long_call_p (tree decl)
7790 {
7791   tree attrs;
7792
7793   if (!decl)
7794     return TARGET_LONG_CALLS;
7795
7796   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7797   if (lookup_attribute ("short_call", attrs))
7798     return false;
7799
7800   /* For "f", be conservative, and only cater for cases in which the
7801      whole of the current function is placed in the same section.  */
7802   if (!flag_reorder_blocks_and_partition
7803       && TREE_CODE (decl) == FUNCTION_DECL
7804       && arm_function_in_section_p (decl, current_function_section ()))
7805     return false;
7806
7807   if (lookup_attribute ("long_call", attrs))
7808     return true;
7809
7810   return TARGET_LONG_CALLS;
7811 }
7812
7813 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7814 static bool
7815 arm_function_ok_for_sibcall (tree decl, tree exp)
7816 {
7817   unsigned long func_type;
7818
7819   if (cfun->machine->sibcall_blocked)
7820     return false;
7821
7822   if (TARGET_FDPIC)
7823     {
7824       /* In FDPIC, never tailcall something for which we have no decl:
7825          the target function could be in a different module, requiring
7826          a different FDPIC register value.  */
7827       if (decl == NULL)
7828         return false;
7829     }
7830
7831   /* Never tailcall something if we are generating code for Thumb-1.  */
7832   if (TARGET_THUMB1)
7833     return false;
7834
7835   /* The PIC register is live on entry to VxWorks PLT entries, so we
7836      must make the call before restoring the PIC register.  */
7837   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7838     return false;
7839
7840   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7841      may be used both as target of the call and base register for restoring
7842      the VFP registers  */
7843   if (TARGET_APCS_FRAME && TARGET_ARM
7844       && TARGET_HARD_FLOAT
7845       && decl && arm_is_long_call_p (decl))
7846     return false;
7847
7848   /* If we are interworking and the function is not declared static
7849      then we can't tail-call it unless we know that it exists in this
7850      compilation unit (since it might be a Thumb routine).  */
7851   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7852       && !TREE_ASM_WRITTEN (decl))
7853     return false;
7854
7855   func_type = arm_current_func_type ();
7856   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7857   if (IS_INTERRUPT (func_type))
7858     return false;
7859
7860   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7861      generated for entry functions themselves.  */
7862   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7863     return false;
7864
7865   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7866      this would complicate matters for later code generation.  */
7867   if (TREE_CODE (exp) == CALL_EXPR)
7868     {
7869       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7870       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7871         return false;
7872     }
7873
7874   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7875     {
7876       /* Check that the return value locations are the same.  For
7877          example that we aren't returning a value from the sibling in
7878          a VFP register but then need to transfer it to a core
7879          register.  */
7880       rtx a, b;
7881       tree decl_or_type = decl;
7882
7883       /* If it is an indirect function pointer, get the function type.  */
7884       if (!decl)
7885         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7886
7887       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7888       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7889                               cfun->decl, false);
7890       if (!rtx_equal_p (a, b))
7891         return false;
7892     }
7893
7894   /* Never tailcall if function may be called with a misaligned SP.  */
7895   if (IS_STACKALIGN (func_type))
7896     return false;
7897
7898   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7899      references should become a NOP.  Don't convert such calls into
7900      sibling calls.  */
7901   if (TARGET_AAPCS_BASED
7902       && arm_abi == ARM_ABI_AAPCS
7903       && decl
7904       && DECL_WEAK (decl))
7905     return false;
7906
7907   /* We cannot do a tailcall for an indirect call by descriptor if all the
7908      argument registers are used because the only register left to load the
7909      address is IP and it will already contain the static chain.  */
7910   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7911     {
7912       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7913       CUMULATIVE_ARGS cum;
7914       cumulative_args_t cum_v;
7915
7916       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7917       cum_v = pack_cumulative_args (&cum);
7918
7919       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7920         {
7921           tree type = TREE_VALUE (t);
7922           if (!VOID_TYPE_P (type))
7923             {
7924               function_arg_info arg (type, /*named=*/true);
7925               arm_function_arg_advance (cum_v, arg);
7926             }
7927         }
7928
7929       function_arg_info arg (integer_type_node, /*named=*/true);
7930       if (!arm_function_arg (cum_v, arg))
7931         return false;
7932     }
7933
7934   /* Everything else is ok.  */
7935   return true;
7936 }
7937
7938 \f
7939 /* Addressing mode support functions.  */
7940
7941 /* Return nonzero if X is a legitimate immediate operand when compiling
7942    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7943 int
7944 legitimate_pic_operand_p (rtx x)
7945 {
7946   if (SYMBOL_REF_P (x)
7947       || (GET_CODE (x) == CONST
7948           && GET_CODE (XEXP (x, 0)) == PLUS
7949           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7950     return 0;
7951
7952   return 1;
7953 }
7954
7955 /* Record that the current function needs a PIC register.  If PIC_REG is null,
7956    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
7957    both case cfun->machine->pic_reg is initialized if we have not already done
7958    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
7959    PIC register is reloaded in the current position of the instruction stream
7960    irregardless of whether it was loaded before.  Otherwise, it is only loaded
7961    if not already done so (crtl->uses_pic_offset_table is null).  Note that
7962    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7963    is only supported iff COMPUTE_NOW is false.  */
7964
7965 static void
7966 require_pic_register (rtx pic_reg, bool compute_now)
7967 {
7968   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7969
7970   /* A lot of the logic here is made obscure by the fact that this
7971      routine gets called as part of the rtx cost estimation process.
7972      We don't want those calls to affect any assumptions about the real
7973      function; and further, we can't call entry_of_function() until we
7974      start the real expansion process.  */
7975   if (!crtl->uses_pic_offset_table || compute_now)
7976     {
7977       gcc_assert (can_create_pseudo_p ()
7978                   || (pic_reg != NULL_RTX
7979                       && REG_P (pic_reg)
7980                       && GET_MODE (pic_reg) == Pmode));
7981       if (arm_pic_register != INVALID_REGNUM
7982           && !compute_now
7983           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7984         {
7985           if (!cfun->machine->pic_reg)
7986             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7987
7988           /* Play games to avoid marking the function as needing pic
7989              if we are being called as part of the cost-estimation
7990              process.  */
7991           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7992             crtl->uses_pic_offset_table = 1;
7993         }
7994       else
7995         {
7996           rtx_insn *seq, *insn;
7997
7998           if (pic_reg == NULL_RTX)
7999             pic_reg = gen_reg_rtx (Pmode);
8000           if (!cfun->machine->pic_reg)
8001             cfun->machine->pic_reg = pic_reg;
8002
8003           /* Play games to avoid marking the function as needing pic
8004              if we are being called as part of the cost-estimation
8005              process.  */
8006           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8007             {
8008               crtl->uses_pic_offset_table = 1;
8009               start_sequence ();
8010
8011               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8012                   && arm_pic_register > LAST_LO_REGNUM
8013                   && !compute_now)
8014                 emit_move_insn (cfun->machine->pic_reg,
8015                                 gen_rtx_REG (Pmode, arm_pic_register));
8016               else
8017                 arm_load_pic_register (0UL, pic_reg);
8018
8019               seq = get_insns ();
8020               end_sequence ();
8021
8022               for (insn = seq; insn; insn = NEXT_INSN (insn))
8023                 if (INSN_P (insn))
8024                   INSN_LOCATION (insn) = prologue_location;
8025
8026               /* We can be called during expansion of PHI nodes, where
8027                  we can't yet emit instructions directly in the final
8028                  insn stream.  Queue the insns on the entry edge, they will
8029                  be committed after everything else is expanded.  */
8030               if (currently_expanding_to_rtl)
8031                 insert_insn_on_edge (seq,
8032                                      single_succ_edge
8033                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8034               else
8035                 emit_insn (seq);
8036             }
8037         }
8038     }
8039 }
8040
8041 /* Generate insns to calculate the address of ORIG in pic mode.  */
8042 static rtx_insn *
8043 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8044 {
8045   rtx pat;
8046   rtx mem;
8047
8048   pat = gen_calculate_pic_address (reg, pic_reg, orig);
8049
8050   /* Make the MEM as close to a constant as possible.  */
8051   mem = SET_SRC (pat);
8052   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8053   MEM_READONLY_P (mem) = 1;
8054   MEM_NOTRAP_P (mem) = 1;
8055
8056   return emit_insn (pat);
8057 }
8058
8059 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
8060    created to hold the result of the load.  If not NULL, PIC_REG indicates
8061    which register to use as PIC register, otherwise it is decided by register
8062    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
8063    location in the instruction stream, irregardless of whether it was loaded
8064    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8065    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8066
8067    Returns the register REG into which the PIC load is performed.  */
8068
8069 rtx
8070 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8071                         bool compute_now)
8072 {
8073   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8074
8075   if (SYMBOL_REF_P (orig)
8076       || LABEL_REF_P (orig))
8077     {
8078       if (reg == 0)
8079         {
8080           gcc_assert (can_create_pseudo_p ());
8081           reg = gen_reg_rtx (Pmode);
8082         }
8083
8084       /* VxWorks does not impose a fixed gap between segments; the run-time
8085          gap can be different from the object-file gap.  We therefore can't
8086          use GOTOFF unless we are absolutely sure that the symbol is in the
8087          same segment as the GOT.  Unfortunately, the flexibility of linker
8088          scripts means that we can't be sure of that in general, so assume
8089          that GOTOFF is never valid on VxWorks.  */
8090       /* References to weak symbols cannot be resolved locally: they
8091          may be overridden by a non-weak definition at link time.  */
8092       rtx_insn *insn;
8093       if ((LABEL_REF_P (orig)
8094            || (SYMBOL_REF_P (orig)
8095                && SYMBOL_REF_LOCAL_P (orig)
8096                && (SYMBOL_REF_DECL (orig)
8097                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8098                && (!SYMBOL_REF_FUNCTION_P (orig)
8099                    || arm_fdpic_local_funcdesc_p (orig))))
8100           && NEED_GOT_RELOC
8101           && arm_pic_data_is_text_relative)
8102         insn = arm_pic_static_addr (orig, reg);
8103       else
8104         {
8105           /* If this function doesn't have a pic register, create one now.  */
8106           require_pic_register (pic_reg, compute_now);
8107
8108           if (pic_reg == NULL_RTX)
8109             pic_reg = cfun->machine->pic_reg;
8110
8111           insn = calculate_pic_address_constant (reg, pic_reg, orig);
8112         }
8113
8114       /* Put a REG_EQUAL note on this insn, so that it can be optimized
8115          by loop.  */
8116       set_unique_reg_note (insn, REG_EQUAL, orig);
8117
8118       return reg;
8119     }
8120   else if (GET_CODE (orig) == CONST)
8121     {
8122       rtx base, offset;
8123
8124       if (GET_CODE (XEXP (orig, 0)) == PLUS
8125           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8126         return orig;
8127
8128       /* Handle the case where we have: const (UNSPEC_TLS).  */
8129       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8130           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8131         return orig;
8132
8133       /* Handle the case where we have:
8134          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8135          CONST_INT.  */
8136       if (GET_CODE (XEXP (orig, 0)) == PLUS
8137           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8138           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8139         {
8140           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8141           return orig;
8142         }
8143
8144       if (reg == 0)
8145         {
8146           gcc_assert (can_create_pseudo_p ());
8147           reg = gen_reg_rtx (Pmode);
8148         }
8149
8150       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8151
8152       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8153                                      pic_reg, compute_now);
8154       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8155                                        base == reg ? 0 : reg, pic_reg,
8156                                        compute_now);
8157
8158       if (CONST_INT_P (offset))
8159         {
8160           /* The base register doesn't really matter, we only want to
8161              test the index for the appropriate mode.  */
8162           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8163             {
8164               gcc_assert (can_create_pseudo_p ());
8165               offset = force_reg (Pmode, offset);
8166             }
8167
8168           if (CONST_INT_P (offset))
8169             return plus_constant (Pmode, base, INTVAL (offset));
8170         }
8171
8172       if (GET_MODE_SIZE (mode) > 4
8173           && (GET_MODE_CLASS (mode) == MODE_INT
8174               || TARGET_SOFT_FLOAT))
8175         {
8176           emit_insn (gen_addsi3 (reg, base, offset));
8177           return reg;
8178         }
8179
8180       return gen_rtx_PLUS (Pmode, base, offset);
8181     }
8182
8183   return orig;
8184 }
8185
8186
8187 /* Generate insns that produce the address of the stack canary */
8188 rtx
8189 arm_stack_protect_tls_canary_mem (bool reload)
8190 {
8191   rtx tp = gen_reg_rtx (SImode);
8192   if (reload)
8193     emit_insn (gen_reload_tp_hard (tp));
8194   else
8195     emit_insn (gen_load_tp_hard (tp));
8196
8197   rtx reg = gen_reg_rtx (SImode);
8198   rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8199   emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8200   return gen_rtx_MEM (SImode, reg);
8201 }
8202
8203
8204 /* Whether a register is callee saved or not.  This is necessary because high
8205    registers are marked as caller saved when optimizing for size on Thumb-1
8206    targets despite being callee saved in order to avoid using them.  */
8207 #define callee_saved_reg_p(reg) \
8208   (!call_used_or_fixed_reg_p (reg) \
8209    || (TARGET_THUMB1 && optimize_size \
8210        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8211
8212 /* Return a mask for the call-clobbered low registers that are unused
8213    at the end of the prologue.  */
8214 static unsigned long
8215 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8216 {
8217   unsigned long mask = 0;
8218   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8219
8220   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8221     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8222       mask |= 1 << (reg - FIRST_LO_REGNUM);
8223   return mask;
8224 }
8225
8226 /* Similarly for the start of the epilogue.  */
8227 static unsigned long
8228 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8229 {
8230   unsigned long mask = 0;
8231   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8232
8233   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8234     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8235       mask |= 1 << (reg - FIRST_LO_REGNUM);
8236   return mask;
8237 }
8238
8239 /* Find a spare register to use during the prolog of a function.  */
8240
8241 static int
8242 thumb_find_work_register (unsigned long pushed_regs_mask)
8243 {
8244   int reg;
8245
8246   unsigned long unused_regs
8247     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8248
8249   /* Check the argument registers first as these are call-used.  The
8250      register allocation order means that sometimes r3 might be used
8251      but earlier argument registers might not, so check them all.  */
8252   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8253     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8254       return reg;
8255
8256   /* Otherwise look for a call-saved register that is going to be pushed.  */
8257   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8258     if (pushed_regs_mask & (1 << reg))
8259       return reg;
8260
8261   if (TARGET_THUMB2)
8262     {
8263       /* Thumb-2 can use high regs.  */
8264       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8265         if (pushed_regs_mask & (1 << reg))
8266           return reg;
8267     }
8268   /* Something went wrong - thumb_compute_save_reg_mask()
8269      should have arranged for a suitable register to be pushed.  */
8270   gcc_unreachable ();
8271 }
8272
8273 static GTY(()) int pic_labelno;
8274
8275 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8276    low register.  */
8277
8278 void
8279 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8280 {
8281   rtx l1, labelno, pic_tmp, pic_rtx;
8282
8283   if (crtl->uses_pic_offset_table == 0
8284       || TARGET_SINGLE_PIC_BASE
8285       || TARGET_FDPIC)
8286     return;
8287
8288   gcc_assert (flag_pic);
8289
8290   if (pic_reg == NULL_RTX)
8291     pic_reg = cfun->machine->pic_reg;
8292   if (TARGET_VXWORKS_RTP)
8293     {
8294       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8295       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8296       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8297
8298       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8299
8300       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8301       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8302     }
8303   else
8304     {
8305       /* We use an UNSPEC rather than a LABEL_REF because this label
8306          never appears in the code stream.  */
8307
8308       labelno = GEN_INT (pic_labelno++);
8309       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8310       l1 = gen_rtx_CONST (VOIDmode, l1);
8311
8312       /* On the ARM the PC register contains 'dot + 8' at the time of the
8313          addition, on the Thumb it is 'dot + 4'.  */
8314       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8315       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8316                                 UNSPEC_GOTSYM_OFF);
8317       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8318
8319       if (TARGET_32BIT)
8320         {
8321           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8322         }
8323       else /* TARGET_THUMB1 */
8324         {
8325           if (arm_pic_register != INVALID_REGNUM
8326               && REGNO (pic_reg) > LAST_LO_REGNUM)
8327             {
8328               /* We will have pushed the pic register, so we should always be
8329                  able to find a work register.  */
8330               pic_tmp = gen_rtx_REG (SImode,
8331                                      thumb_find_work_register (saved_regs));
8332               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8333               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8334               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8335             }
8336           else if (arm_pic_register != INVALID_REGNUM
8337                    && arm_pic_register > LAST_LO_REGNUM
8338                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8339             {
8340               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8341               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8342               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8343             }
8344           else
8345             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8346         }
8347     }
8348
8349   /* Need to emit this whether or not we obey regdecls,
8350      since setjmp/longjmp can cause life info to screw up.  */
8351   emit_use (pic_reg);
8352 }
8353
8354 /* Try to determine whether an object, referenced via ORIG, will be
8355    placed in the text or data segment.  This is used in FDPIC mode, to
8356    decide which relocations to use when accessing ORIG.  *IS_READONLY
8357    is set to true if ORIG is a read-only location, false otherwise.
8358    Return true if we could determine the location of ORIG, false
8359    otherwise.  *IS_READONLY is valid only when we return true.  */
8360 static bool
8361 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8362 {
8363   *is_readonly = false;
8364
8365   if (LABEL_REF_P (orig))
8366     {
8367       *is_readonly = true;
8368       return true;
8369     }
8370
8371   if (SYMBOL_REF_P (orig))
8372     {
8373       if (CONSTANT_POOL_ADDRESS_P (orig))
8374         {
8375           *is_readonly = true;
8376           return true;
8377         }
8378       if (SYMBOL_REF_LOCAL_P (orig)
8379           && !SYMBOL_REF_EXTERNAL_P (orig)
8380           && SYMBOL_REF_DECL (orig)
8381           && (!DECL_P (SYMBOL_REF_DECL (orig))
8382               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8383         {
8384           tree decl = SYMBOL_REF_DECL (orig);
8385           tree init = (TREE_CODE (decl) == VAR_DECL)
8386             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8387             ? decl : 0;
8388           int reloc = 0;
8389           bool named_section, readonly;
8390
8391           if (init && init != error_mark_node)
8392             reloc = compute_reloc_for_constant (init);
8393
8394           named_section = TREE_CODE (decl) == VAR_DECL
8395             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8396           readonly = decl_readonly_section (decl, reloc);
8397
8398           /* We don't know where the link script will put a named
8399              section, so return false in such a case.  */
8400           if (named_section)
8401             return false;
8402
8403           *is_readonly = readonly;
8404           return true;
8405         }
8406
8407       /* We don't know.  */
8408       return false;
8409     }
8410
8411   gcc_unreachable ();
8412 }
8413
8414 /* Generate code to load the address of a static var when flag_pic is set.  */
8415 static rtx_insn *
8416 arm_pic_static_addr (rtx orig, rtx reg)
8417 {
8418   rtx l1, labelno, offset_rtx;
8419   rtx_insn *insn;
8420
8421   gcc_assert (flag_pic);
8422
8423   bool is_readonly = false;
8424   bool info_known = false;
8425
8426   if (TARGET_FDPIC
8427       && SYMBOL_REF_P (orig)
8428       && !SYMBOL_REF_FUNCTION_P (orig))
8429     info_known = arm_is_segment_info_known (orig, &is_readonly);
8430
8431   if (TARGET_FDPIC
8432       && SYMBOL_REF_P (orig)
8433       && !SYMBOL_REF_FUNCTION_P (orig)
8434       && !info_known)
8435     {
8436       /* We don't know where orig is stored, so we have be
8437          pessimistic and use a GOT relocation.  */
8438       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8439
8440       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8441     }
8442   else if (TARGET_FDPIC
8443            && SYMBOL_REF_P (orig)
8444            && (SYMBOL_REF_FUNCTION_P (orig)
8445                || !is_readonly))
8446     {
8447       /* We use the GOTOFF relocation.  */
8448       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8449
8450       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8451       emit_insn (gen_movsi (reg, l1));
8452       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8453     }
8454   else
8455     {
8456       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8457          PC-relative access.  */
8458       /* We use an UNSPEC rather than a LABEL_REF because this label
8459          never appears in the code stream.  */
8460       labelno = GEN_INT (pic_labelno++);
8461       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8462       l1 = gen_rtx_CONST (VOIDmode, l1);
8463
8464       /* On the ARM the PC register contains 'dot + 8' at the time of the
8465          addition, on the Thumb it is 'dot + 4'.  */
8466       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8467       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8468                                    UNSPEC_SYMBOL_OFFSET);
8469       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8470
8471       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8472                                                    labelno));
8473     }
8474
8475   return insn;
8476 }
8477
8478 /* Return nonzero if X is valid as an ARM state addressing register.  */
8479 static int
8480 arm_address_register_rtx_p (rtx x, int strict_p)
8481 {
8482   int regno;
8483
8484   if (!REG_P (x))
8485     return 0;
8486
8487   regno = REGNO (x);
8488
8489   if (strict_p)
8490     return ARM_REGNO_OK_FOR_BASE_P (regno);
8491
8492   return (regno <= LAST_ARM_REGNUM
8493           || regno >= FIRST_PSEUDO_REGISTER
8494           || regno == FRAME_POINTER_REGNUM
8495           || regno == ARG_POINTER_REGNUM);
8496 }
8497
8498 /* Return TRUE if this rtx is the difference of a symbol and a label,
8499    and will reduce to a PC-relative relocation in the object file.
8500    Expressions like this can be left alone when generating PIC, rather
8501    than forced through the GOT.  */
8502 static int
8503 pcrel_constant_p (rtx x)
8504 {
8505   if (GET_CODE (x) == MINUS)
8506     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8507
8508   return FALSE;
8509 }
8510
8511 /* Return true if X will surely end up in an index register after next
8512    splitting pass.  */
8513 static bool
8514 will_be_in_index_register (const_rtx x)
8515 {
8516   /* arm.md: calculate_pic_address will split this into a register.  */
8517   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8518 }
8519
8520 /* Return nonzero if X is a valid ARM state address operand.  */
8521 int
8522 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8523                                 int strict_p)
8524 {
8525   bool use_ldrd;
8526   enum rtx_code code = GET_CODE (x);
8527
8528   if (arm_address_register_rtx_p (x, strict_p))
8529     return 1;
8530
8531   use_ldrd = (TARGET_LDRD
8532               && (mode == DImode || mode == DFmode));
8533
8534   if (code == POST_INC || code == PRE_DEC
8535       || ((code == PRE_INC || code == POST_DEC)
8536           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8537     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8538
8539   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8540            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8541            && GET_CODE (XEXP (x, 1)) == PLUS
8542            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8543     {
8544       rtx addend = XEXP (XEXP (x, 1), 1);
8545
8546       /* Don't allow ldrd post increment by register because it's hard
8547          to fixup invalid register choices.  */
8548       if (use_ldrd
8549           && GET_CODE (x) == POST_MODIFY
8550           && REG_P (addend))
8551         return 0;
8552
8553       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8554               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8555     }
8556
8557   /* After reload constants split into minipools will have addresses
8558      from a LABEL_REF.  */
8559   else if (reload_completed
8560            && (code == LABEL_REF
8561                || (code == CONST
8562                    && GET_CODE (XEXP (x, 0)) == PLUS
8563                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8564                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8565     return 1;
8566
8567   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8568     return 0;
8569
8570   else if (code == PLUS)
8571     {
8572       rtx xop0 = XEXP (x, 0);
8573       rtx xop1 = XEXP (x, 1);
8574
8575       return ((arm_address_register_rtx_p (xop0, strict_p)
8576                && ((CONST_INT_P (xop1)
8577                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8578                    || (!strict_p && will_be_in_index_register (xop1))))
8579               || (arm_address_register_rtx_p (xop1, strict_p)
8580                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8581     }
8582
8583 #if 0
8584   /* Reload currently can't handle MINUS, so disable this for now */
8585   else if (GET_CODE (x) == MINUS)
8586     {
8587       rtx xop0 = XEXP (x, 0);
8588       rtx xop1 = XEXP (x, 1);
8589
8590       return (arm_address_register_rtx_p (xop0, strict_p)
8591               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8592     }
8593 #endif
8594
8595   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8596            && code == SYMBOL_REF
8597            && CONSTANT_POOL_ADDRESS_P (x)
8598            && ! (flag_pic
8599                  && symbol_mentioned_p (get_pool_constant (x))
8600                  && ! pcrel_constant_p (get_pool_constant (x))))
8601     return 1;
8602
8603   return 0;
8604 }
8605
8606 /* Return true if we can avoid creating a constant pool entry for x.  */
8607 static bool
8608 can_avoid_literal_pool_for_label_p (rtx x)
8609 {
8610   /* Normally we can assign constant values to target registers without
8611      the help of constant pool.  But there are cases we have to use constant
8612      pool like:
8613      1) assign a label to register.
8614      2) sign-extend a 8bit value to 32bit and then assign to register.
8615
8616      Constant pool access in format:
8617      (set (reg r0) (mem (symbol_ref (".LC0"))))
8618      will cause the use of literal pool (later in function arm_reorg).
8619      So here we mark such format as an invalid format, then the compiler
8620      will adjust it into:
8621      (set (reg r0) (symbol_ref (".LC0")))
8622      (set (reg r0) (mem (reg r0))).
8623      No extra register is required, and (mem (reg r0)) won't cause the use
8624      of literal pools.  */
8625   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8626       && CONSTANT_POOL_ADDRESS_P (x))
8627     return 1;
8628   return 0;
8629 }
8630
8631
8632 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8633 static int
8634 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8635 {
8636   bool use_ldrd;
8637   enum rtx_code code = GET_CODE (x);
8638
8639   if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8640     return mve_vector_mem_operand (mode, x, strict_p);
8641
8642   if (arm_address_register_rtx_p (x, strict_p))
8643     return 1;
8644
8645   use_ldrd = (TARGET_LDRD
8646               && (mode == DImode || mode == DFmode));
8647
8648   if (code == POST_INC || code == PRE_DEC
8649       || ((code == PRE_INC || code == POST_DEC)
8650           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8651     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8652
8653   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8654            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8655            && GET_CODE (XEXP (x, 1)) == PLUS
8656            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8657     {
8658       /* Thumb-2 only has autoincrement by constant.  */
8659       rtx addend = XEXP (XEXP (x, 1), 1);
8660       HOST_WIDE_INT offset;
8661
8662       if (!CONST_INT_P (addend))
8663         return 0;
8664
8665       offset = INTVAL(addend);
8666       if (GET_MODE_SIZE (mode) <= 4)
8667         return (offset > -256 && offset < 256);
8668
8669       return (use_ldrd && offset > -1024 && offset < 1024
8670               && (offset & 3) == 0);
8671     }
8672
8673   /* After reload constants split into minipools will have addresses
8674      from a LABEL_REF.  */
8675   else if (reload_completed
8676            && (code == LABEL_REF
8677                || (code == CONST
8678                    && GET_CODE (XEXP (x, 0)) == PLUS
8679                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8680                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8681     return 1;
8682
8683   else if (mode == TImode
8684            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8685            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8686     return 0;
8687
8688   else if (code == PLUS)
8689     {
8690       rtx xop0 = XEXP (x, 0);
8691       rtx xop1 = XEXP (x, 1);
8692
8693       return ((arm_address_register_rtx_p (xop0, strict_p)
8694                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8695                    || (!strict_p && will_be_in_index_register (xop1))))
8696               || (arm_address_register_rtx_p (xop1, strict_p)
8697                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8698     }
8699
8700   else if (can_avoid_literal_pool_for_label_p (x))
8701     return 0;
8702
8703   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8704            && code == SYMBOL_REF
8705            && CONSTANT_POOL_ADDRESS_P (x)
8706            && ! (flag_pic
8707                  && symbol_mentioned_p (get_pool_constant (x))
8708                  && ! pcrel_constant_p (get_pool_constant (x))))
8709     return 1;
8710
8711   return 0;
8712 }
8713
8714 /* Return nonzero if INDEX is valid for an address index operand in
8715    ARM state.  */
8716 static int
8717 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8718                         int strict_p)
8719 {
8720   HOST_WIDE_INT range;
8721   enum rtx_code code = GET_CODE (index);
8722
8723   /* Standard coprocessor addressing modes.  */
8724   if (TARGET_HARD_FLOAT
8725       && (mode == SFmode || mode == DFmode))
8726     return (code == CONST_INT && INTVAL (index) < 1024
8727             && INTVAL (index) > -1024
8728             && (INTVAL (index) & 3) == 0);
8729
8730   /* For quad modes, we restrict the constant offset to be slightly less
8731      than what the instruction format permits.  We do this because for
8732      quad mode moves, we will actually decompose them into two separate
8733      double-mode reads or writes.  INDEX must therefore be a valid
8734      (double-mode) offset and so should INDEX+8.  */
8735   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8736     return (code == CONST_INT
8737             && INTVAL (index) < 1016
8738             && INTVAL (index) > -1024
8739             && (INTVAL (index) & 3) == 0);
8740
8741   /* We have no such constraint on double mode offsets, so we permit the
8742      full range of the instruction format.  */
8743   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8744     return (code == CONST_INT
8745             && INTVAL (index) < 1024
8746             && INTVAL (index) > -1024
8747             && (INTVAL (index) & 3) == 0);
8748
8749   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8750     return (code == CONST_INT
8751             && INTVAL (index) < 1024
8752             && INTVAL (index) > -1024
8753             && (INTVAL (index) & 3) == 0);
8754
8755   if (arm_address_register_rtx_p (index, strict_p)
8756       && (GET_MODE_SIZE (mode) <= 4))
8757     return 1;
8758
8759   if (mode == DImode || mode == DFmode)
8760     {
8761       if (code == CONST_INT)
8762         {
8763           HOST_WIDE_INT val = INTVAL (index);
8764
8765           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8766              If vldr is selected it uses arm_coproc_mem_operand.  */
8767           if (TARGET_LDRD)
8768             return val > -256 && val < 256;
8769           else
8770             return val > -4096 && val < 4092;
8771         }
8772
8773       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8774     }
8775
8776   if (GET_MODE_SIZE (mode) <= 4
8777       && ! (arm_arch4
8778             && (mode == HImode
8779                 || mode == HFmode
8780                 || (mode == QImode && outer == SIGN_EXTEND))))
8781     {
8782       if (code == MULT)
8783         {
8784           rtx xiop0 = XEXP (index, 0);
8785           rtx xiop1 = XEXP (index, 1);
8786
8787           return ((arm_address_register_rtx_p (xiop0, strict_p)
8788                    && power_of_two_operand (xiop1, SImode))
8789                   || (arm_address_register_rtx_p (xiop1, strict_p)
8790                       && power_of_two_operand (xiop0, SImode)));
8791         }
8792       else if (code == LSHIFTRT || code == ASHIFTRT
8793                || code == ASHIFT || code == ROTATERT)
8794         {
8795           rtx op = XEXP (index, 1);
8796
8797           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8798                   && CONST_INT_P (op)
8799                   && INTVAL (op) > 0
8800                   && INTVAL (op) <= 31);
8801         }
8802     }
8803
8804   /* For ARM v4 we may be doing a sign-extend operation during the
8805      load.  */
8806   if (arm_arch4)
8807     {
8808       if (mode == HImode
8809           || mode == HFmode
8810           || (outer == SIGN_EXTEND && mode == QImode))
8811         range = 256;
8812       else
8813         range = 4096;
8814     }
8815   else
8816     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8817
8818   return (code == CONST_INT
8819           && INTVAL (index) < range
8820           && INTVAL (index) > -range);
8821 }
8822
8823 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8824    index operand.  i.e. 1, 2, 4 or 8.  */
8825 static bool
8826 thumb2_index_mul_operand (rtx op)
8827 {
8828   HOST_WIDE_INT val;
8829
8830   if (!CONST_INT_P (op))
8831     return false;
8832
8833   val = INTVAL(op);
8834   return (val == 1 || val == 2 || val == 4 || val == 8);
8835 }
8836
8837 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8838 static int
8839 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8840 {
8841   enum rtx_code code = GET_CODE (index);
8842
8843   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8844   /* Standard coprocessor addressing modes.  */
8845   if (TARGET_VFP_BASE
8846       && (mode == SFmode || mode == DFmode))
8847     return (code == CONST_INT && INTVAL (index) < 1024
8848             /* Thumb-2 allows only > -256 index range for it's core register
8849                load/stores. Since we allow SF/DF in core registers, we have
8850                to use the intersection between -256~4096 (core) and -1024~1024
8851                (coprocessor).  */
8852             && INTVAL (index) > -256
8853             && (INTVAL (index) & 3) == 0);
8854
8855   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8856     {
8857       /* For DImode assume values will usually live in core regs
8858          and only allow LDRD addressing modes.  */
8859       if (!TARGET_LDRD || mode != DImode)
8860         return (code == CONST_INT
8861                 && INTVAL (index) < 1024
8862                 && INTVAL (index) > -1024
8863                 && (INTVAL (index) & 3) == 0);
8864     }
8865
8866   /* For quad modes, we restrict the constant offset to be slightly less
8867      than what the instruction format permits.  We do this because for
8868      quad mode moves, we will actually decompose them into two separate
8869      double-mode reads or writes.  INDEX must therefore be a valid
8870      (double-mode) offset and so should INDEX+8.  */
8871   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8872     return (code == CONST_INT
8873             && INTVAL (index) < 1016
8874             && INTVAL (index) > -1024
8875             && (INTVAL (index) & 3) == 0);
8876
8877   /* We have no such constraint on double mode offsets, so we permit the
8878      full range of the instruction format.  */
8879   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8880     return (code == CONST_INT
8881             && INTVAL (index) < 1024
8882             && INTVAL (index) > -1024
8883             && (INTVAL (index) & 3) == 0);
8884
8885   if (arm_address_register_rtx_p (index, strict_p)
8886       && (GET_MODE_SIZE (mode) <= 4))
8887     return 1;
8888
8889   if (mode == DImode || mode == DFmode)
8890     {
8891       if (code == CONST_INT)
8892         {
8893           HOST_WIDE_INT val = INTVAL (index);
8894           /* Thumb-2 ldrd only has reg+const addressing modes.
8895              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8896              If vldr is selected it uses arm_coproc_mem_operand.  */
8897           if (TARGET_LDRD)
8898             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8899           else
8900             return IN_RANGE (val, -255, 4095 - 4);
8901         }
8902       else
8903         return 0;
8904     }
8905
8906   if (code == MULT)
8907     {
8908       rtx xiop0 = XEXP (index, 0);
8909       rtx xiop1 = XEXP (index, 1);
8910
8911       return ((arm_address_register_rtx_p (xiop0, strict_p)
8912                && thumb2_index_mul_operand (xiop1))
8913               || (arm_address_register_rtx_p (xiop1, strict_p)
8914                   && thumb2_index_mul_operand (xiop0)));
8915     }
8916   else if (code == ASHIFT)
8917     {
8918       rtx op = XEXP (index, 1);
8919
8920       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8921               && CONST_INT_P (op)
8922               && INTVAL (op) > 0
8923               && INTVAL (op) <= 3);
8924     }
8925
8926   return (code == CONST_INT
8927           && INTVAL (index) < 4096
8928           && INTVAL (index) > -256);
8929 }
8930
8931 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8932 static int
8933 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8934 {
8935   int regno;
8936
8937   if (!REG_P (x))
8938     return 0;
8939
8940   regno = REGNO (x);
8941
8942   if (strict_p)
8943     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8944
8945   return (regno <= LAST_LO_REGNUM
8946           || regno > LAST_VIRTUAL_REGISTER
8947           || regno == FRAME_POINTER_REGNUM
8948           || (GET_MODE_SIZE (mode) >= 4
8949               && (regno == STACK_POINTER_REGNUM
8950                   || regno >= FIRST_PSEUDO_REGISTER
8951                   || x == hard_frame_pointer_rtx
8952                   || x == arg_pointer_rtx)));
8953 }
8954
8955 /* Return nonzero if x is a legitimate index register.  This is the case
8956    for any base register that can access a QImode object.  */
8957 inline static int
8958 thumb1_index_register_rtx_p (rtx x, int strict_p)
8959 {
8960   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8961 }
8962
8963 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8964
8965    The AP may be eliminated to either the SP or the FP, so we use the
8966    least common denominator, e.g. SImode, and offsets from 0 to 64.
8967
8968    ??? Verify whether the above is the right approach.
8969
8970    ??? Also, the FP may be eliminated to the SP, so perhaps that
8971    needs special handling also.
8972
8973    ??? Look at how the mips16 port solves this problem.  It probably uses
8974    better ways to solve some of these problems.
8975
8976    Although it is not incorrect, we don't accept QImode and HImode
8977    addresses based on the frame pointer or arg pointer until the
8978    reload pass starts.  This is so that eliminating such addresses
8979    into stack based ones won't produce impossible code.  */
8980 int
8981 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8982 {
8983   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8984     return 0;
8985
8986   /* ??? Not clear if this is right.  Experiment.  */
8987   if (GET_MODE_SIZE (mode) < 4
8988       && !(reload_in_progress || reload_completed)
8989       && (reg_mentioned_p (frame_pointer_rtx, x)
8990           || reg_mentioned_p (arg_pointer_rtx, x)
8991           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8992           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8993           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8994           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8995     return 0;
8996
8997   /* Accept any base register.  SP only in SImode or larger.  */
8998   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8999     return 1;
9000
9001   /* This is PC relative data before arm_reorg runs.  */
9002   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9003            && SYMBOL_REF_P (x)
9004            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9005            && !arm_disable_literal_pool)
9006     return 1;
9007
9008   /* This is PC relative data after arm_reorg runs.  */
9009   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9010            && reload_completed
9011            && (LABEL_REF_P (x)
9012                || (GET_CODE (x) == CONST
9013                    && GET_CODE (XEXP (x, 0)) == PLUS
9014                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9015                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9016     return 1;
9017
9018   /* Post-inc indexing only supported for SImode and larger.  */
9019   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9020            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9021     return 1;
9022
9023   else if (GET_CODE (x) == PLUS)
9024     {
9025       /* REG+REG address can be any two index registers.  */
9026       /* We disallow FRAME+REG addressing since we know that FRAME
9027          will be replaced with STACK, and SP relative addressing only
9028          permits SP+OFFSET.  */
9029       if (GET_MODE_SIZE (mode) <= 4
9030           && XEXP (x, 0) != frame_pointer_rtx
9031           && XEXP (x, 1) != frame_pointer_rtx
9032           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9033           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9034               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9035         return 1;
9036
9037       /* REG+const has 5-7 bit offset for non-SP registers.  */
9038       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9039                 || XEXP (x, 0) == arg_pointer_rtx)
9040                && CONST_INT_P (XEXP (x, 1))
9041                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9042         return 1;
9043
9044       /* REG+const has 10-bit offset for SP, but only SImode and
9045          larger is supported.  */
9046       /* ??? Should probably check for DI/DFmode overflow here
9047          just like GO_IF_LEGITIMATE_OFFSET does.  */
9048       else if (REG_P (XEXP (x, 0))
9049                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9050                && GET_MODE_SIZE (mode) >= 4
9051                && CONST_INT_P (XEXP (x, 1))
9052                && INTVAL (XEXP (x, 1)) >= 0
9053                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9054                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9055         return 1;
9056
9057       else if (REG_P (XEXP (x, 0))
9058                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9059                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9060                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
9061                        && REGNO (XEXP (x, 0))
9062                           <= LAST_VIRTUAL_POINTER_REGISTER))
9063                && GET_MODE_SIZE (mode) >= 4
9064                && CONST_INT_P (XEXP (x, 1))
9065                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9066         return 1;
9067     }
9068
9069   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9070            && GET_MODE_SIZE (mode) == 4
9071            && SYMBOL_REF_P (x)
9072            && CONSTANT_POOL_ADDRESS_P (x)
9073            && !arm_disable_literal_pool
9074            && ! (flag_pic
9075                  && symbol_mentioned_p (get_pool_constant (x))
9076                  && ! pcrel_constant_p (get_pool_constant (x))))
9077     return 1;
9078
9079   return 0;
9080 }
9081
9082 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9083    instruction of mode MODE.  */
9084 int
9085 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9086 {
9087   switch (GET_MODE_SIZE (mode))
9088     {
9089     case 1:
9090       return val >= 0 && val < 32;
9091
9092     case 2:
9093       return val >= 0 && val < 64 && (val & 1) == 0;
9094
9095     default:
9096       return (val >= 0
9097               && (val + GET_MODE_SIZE (mode)) <= 128
9098               && (val & 3) == 0);
9099     }
9100 }
9101
9102 bool
9103 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
9104 {
9105   if (TARGET_ARM)
9106     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9107   else if (TARGET_THUMB2)
9108     return thumb2_legitimate_address_p (mode, x, strict_p);
9109   else /* if (TARGET_THUMB1) */
9110     return thumb1_legitimate_address_p (mode, x, strict_p);
9111 }
9112
9113 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9114
9115    Given an rtx X being reloaded into a reg required to be
9116    in class CLASS, return the class of reg to actually use.
9117    In general this is just CLASS, but for the Thumb core registers and
9118    immediate constants we prefer a LO_REGS class or a subset.  */
9119
9120 static reg_class_t
9121 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9122 {
9123   if (TARGET_32BIT)
9124     return rclass;
9125   else
9126     {
9127       if (rclass == GENERAL_REGS)
9128         return LO_REGS;
9129       else
9130         return rclass;
9131     }
9132 }
9133
9134 /* Build the SYMBOL_REF for __tls_get_addr.  */
9135
9136 static GTY(()) rtx tls_get_addr_libfunc;
9137
9138 static rtx
9139 get_tls_get_addr (void)
9140 {
9141   if (!tls_get_addr_libfunc)
9142     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9143   return tls_get_addr_libfunc;
9144 }
9145
9146 rtx
9147 arm_load_tp (rtx target)
9148 {
9149   if (!target)
9150     target = gen_reg_rtx (SImode);
9151
9152   if (TARGET_HARD_TP)
9153     {
9154       /* Can return in any reg.  */
9155       emit_insn (gen_load_tp_hard (target));
9156     }
9157   else
9158     {
9159       /* Always returned in r0.  Immediately copy the result into a pseudo,
9160          otherwise other uses of r0 (e.g. setting up function arguments) may
9161          clobber the value.  */
9162
9163       rtx tmp;
9164
9165       if (TARGET_FDPIC)
9166         {
9167           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9168           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9169
9170           emit_insn (gen_load_tp_soft_fdpic ());
9171
9172           /* Restore r9.  */
9173           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9174         }
9175       else
9176         emit_insn (gen_load_tp_soft ());
9177
9178       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9179       emit_move_insn (target, tmp);
9180     }
9181   return target;
9182 }
9183
9184 static rtx
9185 load_tls_operand (rtx x, rtx reg)
9186 {
9187   rtx tmp;
9188
9189   if (reg == NULL_RTX)
9190     reg = gen_reg_rtx (SImode);
9191
9192   tmp = gen_rtx_CONST (SImode, x);
9193
9194   emit_move_insn (reg, tmp);
9195
9196   return reg;
9197 }
9198
9199 static rtx_insn *
9200 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9201 {
9202   rtx label, labelno = NULL_RTX, sum;
9203
9204   gcc_assert (reloc != TLS_DESCSEQ);
9205   start_sequence ();
9206
9207   if (TARGET_FDPIC)
9208     {
9209       sum = gen_rtx_UNSPEC (Pmode,
9210                             gen_rtvec (2, x, GEN_INT (reloc)),
9211                             UNSPEC_TLS);
9212     }
9213   else
9214     {
9215       labelno = GEN_INT (pic_labelno++);
9216       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9217       label = gen_rtx_CONST (VOIDmode, label);
9218
9219       sum = gen_rtx_UNSPEC (Pmode,
9220                             gen_rtvec (4, x, GEN_INT (reloc), label,
9221                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9222                             UNSPEC_TLS);
9223     }
9224   reg = load_tls_operand (sum, reg);
9225
9226   if (TARGET_FDPIC)
9227       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9228   else if (TARGET_ARM)
9229     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9230   else
9231     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9232
9233   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9234                                      LCT_PURE, /* LCT_CONST?  */
9235                                      Pmode, reg, Pmode);
9236
9237   rtx_insn *insns = get_insns ();
9238   end_sequence ();
9239
9240   return insns;
9241 }
9242
9243 static rtx
9244 arm_tls_descseq_addr (rtx x, rtx reg)
9245 {
9246   rtx labelno = GEN_INT (pic_labelno++);
9247   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9248   rtx sum = gen_rtx_UNSPEC (Pmode,
9249                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9250                                        gen_rtx_CONST (VOIDmode, label),
9251                                        GEN_INT (!TARGET_ARM)),
9252                             UNSPEC_TLS);
9253   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9254
9255   emit_insn (gen_tlscall (x, labelno));
9256   if (!reg)
9257     reg = gen_reg_rtx (SImode);
9258   else
9259     gcc_assert (REGNO (reg) != R0_REGNUM);
9260
9261   emit_move_insn (reg, reg0);
9262
9263   return reg;
9264 }
9265
9266
9267 rtx
9268 legitimize_tls_address (rtx x, rtx reg)
9269 {
9270   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9271   rtx_insn *insns;
9272   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9273
9274   switch (model)
9275     {
9276     case TLS_MODEL_GLOBAL_DYNAMIC:
9277       if (TARGET_GNU2_TLS)
9278         {
9279           gcc_assert (!TARGET_FDPIC);
9280
9281           reg = arm_tls_descseq_addr (x, reg);
9282
9283           tp = arm_load_tp (NULL_RTX);
9284
9285           dest = gen_rtx_PLUS (Pmode, tp, reg);
9286         }
9287       else
9288         {
9289           /* Original scheme */
9290           if (TARGET_FDPIC)
9291             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9292           else
9293             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9294           dest = gen_reg_rtx (Pmode);
9295           emit_libcall_block (insns, dest, ret, x);
9296         }
9297       return dest;
9298
9299     case TLS_MODEL_LOCAL_DYNAMIC:
9300       if (TARGET_GNU2_TLS)
9301         {
9302           gcc_assert (!TARGET_FDPIC);
9303
9304           reg = arm_tls_descseq_addr (x, reg);
9305
9306           tp = arm_load_tp (NULL_RTX);
9307
9308           dest = gen_rtx_PLUS (Pmode, tp, reg);
9309         }
9310       else
9311         {
9312           if (TARGET_FDPIC)
9313             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9314           else
9315             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9316
9317           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9318              share the LDM result with other LD model accesses.  */
9319           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9320                                 UNSPEC_TLS);
9321           dest = gen_reg_rtx (Pmode);
9322           emit_libcall_block (insns, dest, ret, eqv);
9323
9324           /* Load the addend.  */
9325           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9326                                                      GEN_INT (TLS_LDO32)),
9327                                    UNSPEC_TLS);
9328           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9329           dest = gen_rtx_PLUS (Pmode, dest, addend);
9330         }
9331       return dest;
9332
9333     case TLS_MODEL_INITIAL_EXEC:
9334       if (TARGET_FDPIC)
9335         {
9336           sum = gen_rtx_UNSPEC (Pmode,
9337                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9338                                 UNSPEC_TLS);
9339           reg = load_tls_operand (sum, reg);
9340           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9341           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9342         }
9343       else
9344         {
9345           labelno = GEN_INT (pic_labelno++);
9346           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9347           label = gen_rtx_CONST (VOIDmode, label);
9348           sum = gen_rtx_UNSPEC (Pmode,
9349                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9350                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9351                                 UNSPEC_TLS);
9352           reg = load_tls_operand (sum, reg);
9353
9354           if (TARGET_ARM)
9355             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9356           else if (TARGET_THUMB2)
9357             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9358           else
9359             {
9360               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9361               emit_move_insn (reg, gen_const_mem (SImode, reg));
9362             }
9363         }
9364
9365       tp = arm_load_tp (NULL_RTX);
9366
9367       return gen_rtx_PLUS (Pmode, tp, reg);
9368
9369     case TLS_MODEL_LOCAL_EXEC:
9370       tp = arm_load_tp (NULL_RTX);
9371
9372       reg = gen_rtx_UNSPEC (Pmode,
9373                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9374                             UNSPEC_TLS);
9375       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9376
9377       return gen_rtx_PLUS (Pmode, tp, reg);
9378
9379     default:
9380       abort ();
9381     }
9382 }
9383
9384 /* Try machine-dependent ways of modifying an illegitimate address
9385    to be legitimate.  If we find one, return the new, valid address.  */
9386 rtx
9387 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9388 {
9389   if (arm_tls_referenced_p (x))
9390     {
9391       rtx addend = NULL;
9392
9393       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9394         {
9395           addend = XEXP (XEXP (x, 0), 1);
9396           x = XEXP (XEXP (x, 0), 0);
9397         }
9398
9399       if (!SYMBOL_REF_P (x))
9400         return x;
9401
9402       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9403
9404       x = legitimize_tls_address (x, NULL_RTX);
9405
9406       if (addend)
9407         {
9408           x = gen_rtx_PLUS (SImode, x, addend);
9409           orig_x = x;
9410         }
9411       else
9412         return x;
9413     }
9414
9415   if (TARGET_THUMB1)
9416     return thumb_legitimize_address (x, orig_x, mode);
9417
9418   if (GET_CODE (x) == PLUS)
9419     {
9420       rtx xop0 = XEXP (x, 0);
9421       rtx xop1 = XEXP (x, 1);
9422
9423       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9424         xop0 = force_reg (SImode, xop0);
9425
9426       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9427           && !symbol_mentioned_p (xop1))
9428         xop1 = force_reg (SImode, xop1);
9429
9430       if (ARM_BASE_REGISTER_RTX_P (xop0)
9431           && CONST_INT_P (xop1))
9432         {
9433           HOST_WIDE_INT n, low_n;
9434           rtx base_reg, val;
9435           n = INTVAL (xop1);
9436
9437           /* VFP addressing modes actually allow greater offsets, but for
9438              now we just stick with the lowest common denominator.  */
9439           if (mode == DImode || mode == DFmode)
9440             {
9441               low_n = n & 0x0f;
9442               n &= ~0x0f;
9443               if (low_n > 4)
9444                 {
9445                   n += 16;
9446                   low_n -= 16;
9447                 }
9448             }
9449           else
9450             {
9451               low_n = ((mode) == TImode ? 0
9452                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9453               n -= low_n;
9454             }
9455
9456           base_reg = gen_reg_rtx (SImode);
9457           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9458           emit_move_insn (base_reg, val);
9459           x = plus_constant (Pmode, base_reg, low_n);
9460         }
9461       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9462         x = gen_rtx_PLUS (SImode, xop0, xop1);
9463     }
9464
9465   /* XXX We don't allow MINUS any more -- see comment in
9466      arm_legitimate_address_outer_p ().  */
9467   else if (GET_CODE (x) == MINUS)
9468     {
9469       rtx xop0 = XEXP (x, 0);
9470       rtx xop1 = XEXP (x, 1);
9471
9472       if (CONSTANT_P (xop0))
9473         xop0 = force_reg (SImode, xop0);
9474
9475       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9476         xop1 = force_reg (SImode, xop1);
9477
9478       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9479         x = gen_rtx_MINUS (SImode, xop0, xop1);
9480     }
9481
9482   /* Make sure to take full advantage of the pre-indexed addressing mode
9483      with absolute addresses which often allows for the base register to
9484      be factorized for multiple adjacent memory references, and it might
9485      even allows for the mini pool to be avoided entirely. */
9486   else if (CONST_INT_P (x) && optimize > 0)
9487     {
9488       unsigned int bits;
9489       HOST_WIDE_INT mask, base, index;
9490       rtx base_reg;
9491
9492       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9493          only use a 8-bit index. So let's use a 12-bit index for
9494          SImode only and hope that arm_gen_constant will enable LDRB
9495          to use more bits. */
9496       bits = (mode == SImode) ? 12 : 8;
9497       mask = (1 << bits) - 1;
9498       base = INTVAL (x) & ~mask;
9499       index = INTVAL (x) & mask;
9500       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9501         {
9502           /* It'll most probably be more efficient to generate the
9503              base with more bits set and use a negative index instead.
9504              Don't do this for Thumb as negative offsets are much more
9505              limited.  */
9506           base |= mask;
9507           index -= mask;
9508         }
9509       base_reg = force_reg (SImode, GEN_INT (base));
9510       x = plus_constant (Pmode, base_reg, index);
9511     }
9512
9513   if (flag_pic)
9514     {
9515       /* We need to find and carefully transform any SYMBOL and LABEL
9516          references; so go back to the original address expression.  */
9517       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9518                                           false /*compute_now*/);
9519
9520       if (new_x != orig_x)
9521         x = new_x;
9522     }
9523
9524   return x;
9525 }
9526
9527
9528 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9529    to be legitimate.  If we find one, return the new, valid address.  */
9530 rtx
9531 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9532 {
9533   if (GET_CODE (x) == PLUS
9534       && CONST_INT_P (XEXP (x, 1))
9535       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9536           || INTVAL (XEXP (x, 1)) < 0))
9537     {
9538       rtx xop0 = XEXP (x, 0);
9539       rtx xop1 = XEXP (x, 1);
9540       HOST_WIDE_INT offset = INTVAL (xop1);
9541
9542       /* Try and fold the offset into a biasing of the base register and
9543          then offsetting that.  Don't do this when optimizing for space
9544          since it can cause too many CSEs.  */
9545       if (optimize_size && offset >= 0
9546           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9547         {
9548           HOST_WIDE_INT delta;
9549
9550           if (offset >= 256)
9551             delta = offset - (256 - GET_MODE_SIZE (mode));
9552           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9553             delta = 31 * GET_MODE_SIZE (mode);
9554           else
9555             delta = offset & (~31 * GET_MODE_SIZE (mode));
9556
9557           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9558                                 NULL_RTX);
9559           x = plus_constant (Pmode, xop0, delta);
9560         }
9561       else if (offset < 0 && offset > -256)
9562         /* Small negative offsets are best done with a subtract before the
9563            dereference, forcing these into a register normally takes two
9564            instructions.  */
9565         x = force_operand (x, NULL_RTX);
9566       else
9567         {
9568           /* For the remaining cases, force the constant into a register.  */
9569           xop1 = force_reg (SImode, xop1);
9570           x = gen_rtx_PLUS (SImode, xop0, xop1);
9571         }
9572     }
9573   else if (GET_CODE (x) == PLUS
9574            && s_register_operand (XEXP (x, 1), SImode)
9575            && !s_register_operand (XEXP (x, 0), SImode))
9576     {
9577       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9578
9579       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9580     }
9581
9582   if (flag_pic)
9583     {
9584       /* We need to find and carefully transform any SYMBOL and LABEL
9585          references; so go back to the original address expression.  */
9586       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9587                                           false /*compute_now*/);
9588
9589       if (new_x != orig_x)
9590         x = new_x;
9591     }
9592
9593   return x;
9594 }
9595
9596 /* Return TRUE if X contains any TLS symbol references.  */
9597
9598 bool
9599 arm_tls_referenced_p (rtx x)
9600 {
9601   if (! TARGET_HAVE_TLS)
9602     return false;
9603
9604   subrtx_iterator::array_type array;
9605   FOR_EACH_SUBRTX (iter, array, x, ALL)
9606     {
9607       const_rtx x = *iter;
9608       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9609         {
9610           /* ARM currently does not provide relocations to encode TLS variables
9611              into AArch32 instructions, only data, so there is no way to
9612              currently implement these if a literal pool is disabled.  */
9613           if (arm_disable_literal_pool)
9614             sorry ("accessing thread-local storage is not currently supported "
9615                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9616
9617           return true;
9618         }
9619
9620       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9621          TLS offsets, not real symbol references.  */
9622       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9623         iter.skip_subrtxes ();
9624     }
9625   return false;
9626 }
9627
9628 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9629
9630    On the ARM, allow any integer (invalid ones are removed later by insn
9631    patterns), nice doubles and symbol_refs which refer to the function's
9632    constant pool XXX.
9633
9634    When generating pic allow anything.  */
9635
9636 static bool
9637 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9638 {
9639   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9640     return false;
9641
9642   return flag_pic || !label_mentioned_p (x);
9643 }
9644
9645 static bool
9646 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9647 {
9648   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9649      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9650      for ARMv8-M Baseline or later the result is valid.  */
9651   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9652     x = XEXP (x, 0);
9653
9654   return (CONST_INT_P (x)
9655           || CONST_DOUBLE_P (x)
9656           || CONSTANT_ADDRESS_P (x)
9657           || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9658           /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9659              we build the symbol address with upper/lower
9660              relocations.  */
9661           || (TARGET_THUMB1
9662               && !label_mentioned_p (x)
9663               && arm_valid_symbolic_address_p (x)
9664               && arm_disable_literal_pool)
9665           || flag_pic);
9666 }
9667
9668 static bool
9669 arm_legitimate_constant_p (machine_mode mode, rtx x)
9670 {
9671   return (!arm_cannot_force_const_mem (mode, x)
9672           && (TARGET_32BIT
9673               ? arm_legitimate_constant_p_1 (mode, x)
9674               : thumb_legitimate_constant_p (mode, x)));
9675 }
9676
9677 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9678
9679 static bool
9680 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9681 {
9682   rtx base, offset;
9683   split_const (x, &base, &offset);
9684
9685   if (SYMBOL_REF_P (base))
9686     {
9687       /* Function symbols cannot have an offset due to the Thumb bit.  */
9688       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9689           && INTVAL (offset) != 0)
9690         return true;
9691
9692       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9693           && !offset_within_block_p (base, INTVAL (offset)))
9694         return true;
9695     }
9696   return arm_tls_referenced_p (x);
9697 }
9698 \f
9699 #define REG_OR_SUBREG_REG(X)                                            \
9700   (REG_P (X)                                                    \
9701    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9702
9703 #define REG_OR_SUBREG_RTX(X)                    \
9704    (REG_P (X) ? (X) : SUBREG_REG (X))
9705
9706 static inline int
9707 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9708 {
9709   machine_mode mode = GET_MODE (x);
9710   int total, words;
9711
9712   switch (code)
9713     {
9714     case ASHIFT:
9715     case ASHIFTRT:
9716     case LSHIFTRT:
9717     case ROTATERT:
9718       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9719
9720     case PLUS:
9721     case MINUS:
9722     case COMPARE:
9723     case NEG:
9724     case NOT:
9725       return COSTS_N_INSNS (1);
9726
9727     case MULT:
9728       if (arm_arch6m && arm_m_profile_small_mul)
9729         return COSTS_N_INSNS (32);
9730
9731       if (CONST_INT_P (XEXP (x, 1)))
9732         {
9733           int cycles = 0;
9734           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9735
9736           while (i)
9737             {
9738               i >>= 2;
9739               cycles++;
9740             }
9741           return COSTS_N_INSNS (2) + cycles;
9742         }
9743       return COSTS_N_INSNS (1) + 16;
9744
9745     case SET:
9746       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9747          the mode.  */
9748       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9749       return (COSTS_N_INSNS (words)
9750               + 4 * ((MEM_P (SET_SRC (x)))
9751                      + MEM_P (SET_DEST (x))));
9752
9753     case CONST_INT:
9754       if (outer == SET)
9755         {
9756           if (UINTVAL (x) < 256
9757               /* 16-bit constant.  */
9758               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9759             return 0;
9760           if (thumb_shiftable_const (INTVAL (x)))
9761             return COSTS_N_INSNS (2);
9762           return arm_disable_literal_pool
9763             ? COSTS_N_INSNS (8)
9764             : COSTS_N_INSNS (3);
9765         }
9766       else if ((outer == PLUS || outer == COMPARE)
9767                && INTVAL (x) < 256 && INTVAL (x) > -256)
9768         return 0;
9769       else if ((outer == IOR || outer == XOR || outer == AND)
9770                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9771         return COSTS_N_INSNS (1);
9772       else if (outer == AND)
9773         {
9774           int i;
9775           /* This duplicates the tests in the andsi3 expander.  */
9776           for (i = 9; i <= 31; i++)
9777             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9778                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9779               return COSTS_N_INSNS (2);
9780         }
9781       else if (outer == ASHIFT || outer == ASHIFTRT
9782                || outer == LSHIFTRT)
9783         return 0;
9784       return COSTS_N_INSNS (2);
9785
9786     case CONST:
9787     case CONST_DOUBLE:
9788     case LABEL_REF:
9789     case SYMBOL_REF:
9790       return COSTS_N_INSNS (3);
9791
9792     case UDIV:
9793     case UMOD:
9794     case DIV:
9795     case MOD:
9796       return 100;
9797
9798     case TRUNCATE:
9799       return 99;
9800
9801     case AND:
9802     case XOR:
9803     case IOR:
9804       /* XXX guess.  */
9805       return 8;
9806
9807     case MEM:
9808       /* XXX another guess.  */
9809       /* Memory costs quite a lot for the first word, but subsequent words
9810          load at the equivalent of a single insn each.  */
9811       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9812               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9813                  ? 4 : 0));
9814
9815     case IF_THEN_ELSE:
9816       /* XXX a guess.  */
9817       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9818         return 14;
9819       return 2;
9820
9821     case SIGN_EXTEND:
9822     case ZERO_EXTEND:
9823       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9824       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9825
9826       if (mode == SImode)
9827         return total;
9828
9829       if (arm_arch6)
9830         return total + COSTS_N_INSNS (1);
9831
9832       /* Assume a two-shift sequence.  Increase the cost slightly so
9833          we prefer actual shifts over an extend operation.  */
9834       return total + 1 + COSTS_N_INSNS (2);
9835
9836     default:
9837       return 99;
9838     }
9839 }
9840
9841 /* Estimates the size cost of thumb1 instructions.
9842    For now most of the code is copied from thumb1_rtx_costs. We need more
9843    fine grain tuning when we have more related test cases.  */
9844 static inline int
9845 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9846 {
9847   machine_mode mode = GET_MODE (x);
9848   int words, cost;
9849
9850   switch (code)
9851     {
9852     case ASHIFT:
9853     case ASHIFTRT:
9854     case LSHIFTRT:
9855     case ROTATERT:
9856       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9857
9858     case PLUS:
9859     case MINUS:
9860       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9861          defined by RTL expansion, especially for the expansion of
9862          multiplication.  */
9863       if ((GET_CODE (XEXP (x, 0)) == MULT
9864            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9865           || (GET_CODE (XEXP (x, 1)) == MULT
9866               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9867         return COSTS_N_INSNS (2);
9868       /* Fall through.  */
9869     case COMPARE:
9870     case NEG:
9871     case NOT:
9872       return COSTS_N_INSNS (1);
9873
9874     case MULT:
9875       if (CONST_INT_P (XEXP (x, 1)))
9876         {
9877           /* Thumb1 mul instruction can't operate on const. We must Load it
9878              into a register first.  */
9879           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9880           /* For the targets which have a very small and high-latency multiply
9881              unit, we prefer to synthesize the mult with up to 5 instructions,
9882              giving a good balance between size and performance.  */
9883           if (arm_arch6m && arm_m_profile_small_mul)
9884             return COSTS_N_INSNS (5);
9885           else
9886             return COSTS_N_INSNS (1) + const_size;
9887         }
9888       return COSTS_N_INSNS (1);
9889
9890     case SET:
9891       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9892          the mode.  */
9893       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9894       cost = COSTS_N_INSNS (words);
9895       if (satisfies_constraint_J (SET_SRC (x))
9896           || satisfies_constraint_K (SET_SRC (x))
9897              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9898           || (CONST_INT_P (SET_SRC (x))
9899               && UINTVAL (SET_SRC (x)) >= 256
9900               && TARGET_HAVE_MOVT
9901               && satisfies_constraint_j (SET_SRC (x)))
9902              /* thumb1_movdi_insn.  */
9903           || ((words > 1) && MEM_P (SET_SRC (x))))
9904         cost += COSTS_N_INSNS (1);
9905       return cost;
9906
9907     case CONST_INT:
9908       if (outer == SET)
9909         {
9910           if (UINTVAL (x) < 256)
9911             return COSTS_N_INSNS (1);
9912           /* movw is 4byte long.  */
9913           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9914             return COSTS_N_INSNS (2);
9915           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9916           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9917             return COSTS_N_INSNS (2);
9918           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9919           if (thumb_shiftable_const (INTVAL (x)))
9920             return COSTS_N_INSNS (2);
9921           return arm_disable_literal_pool
9922             ? COSTS_N_INSNS (8)
9923             : COSTS_N_INSNS (3);
9924         }
9925       else if ((outer == PLUS || outer == COMPARE)
9926                && INTVAL (x) < 256 && INTVAL (x) > -256)
9927         return 0;
9928       else if ((outer == IOR || outer == XOR || outer == AND)
9929                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9930         return COSTS_N_INSNS (1);
9931       else if (outer == AND)
9932         {
9933           int i;
9934           /* This duplicates the tests in the andsi3 expander.  */
9935           for (i = 9; i <= 31; i++)
9936             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9937                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9938               return COSTS_N_INSNS (2);
9939         }
9940       else if (outer == ASHIFT || outer == ASHIFTRT
9941                || outer == LSHIFTRT)
9942         return 0;
9943       return COSTS_N_INSNS (2);
9944
9945     case CONST:
9946     case CONST_DOUBLE:
9947     case LABEL_REF:
9948     case SYMBOL_REF:
9949       return COSTS_N_INSNS (3);
9950
9951     case UDIV:
9952     case UMOD:
9953     case DIV:
9954     case MOD:
9955       return 100;
9956
9957     case TRUNCATE:
9958       return 99;
9959
9960     case AND:
9961     case XOR:
9962     case IOR:
9963       return COSTS_N_INSNS (1);
9964
9965     case MEM:
9966       return (COSTS_N_INSNS (1)
9967               + COSTS_N_INSNS (1)
9968                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9969               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9970                  ? COSTS_N_INSNS (1) : 0));
9971
9972     case IF_THEN_ELSE:
9973       /* XXX a guess.  */
9974       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9975         return 14;
9976       return 2;
9977
9978     case ZERO_EXTEND:
9979       /* XXX still guessing.  */
9980       switch (GET_MODE (XEXP (x, 0)))
9981         {
9982           case E_QImode:
9983             return (1 + (mode == DImode ? 4 : 0)
9984                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9985
9986           case E_HImode:
9987             return (4 + (mode == DImode ? 4 : 0)
9988                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9989
9990           case E_SImode:
9991             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9992
9993           default:
9994             return 99;
9995         }
9996
9997     default:
9998       return 99;
9999     }
10000 }
10001
10002 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
10003    PLUS, adds the carry flag, then return the other operand.  If
10004    neither is a carry, return OP unchanged.  */
10005 static rtx
10006 strip_carry_operation (rtx op)
10007 {
10008   gcc_assert (GET_CODE (op) == PLUS);
10009   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10010     return XEXP (op, 1);
10011   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10012     return XEXP (op, 0);
10013   return op;
10014 }
10015
10016 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
10017    operand, then return the operand that is being shifted.  If the shift
10018    is not by a constant, then set SHIFT_REG to point to the operand.
10019    Return NULL if OP is not a shifter operand.  */
10020 static rtx
10021 shifter_op_p (rtx op, rtx *shift_reg)
10022 {
10023   enum rtx_code code = GET_CODE (op);
10024
10025   if (code == MULT && CONST_INT_P (XEXP (op, 1))
10026       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10027     return XEXP (op, 0);
10028   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10029     return XEXP (op, 0);
10030   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10031            || code == ASHIFTRT)
10032     {
10033       if (!CONST_INT_P (XEXP (op, 1)))
10034         *shift_reg = XEXP (op, 1);
10035       return XEXP (op, 0);
10036     }
10037
10038   return NULL;
10039 }
10040
10041 static bool
10042 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10043 {
10044   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10045   rtx_code code = GET_CODE (x);
10046   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10047
10048   switch (XINT (x, 1))
10049     {
10050     case UNSPEC_UNALIGNED_LOAD:
10051       /* We can only do unaligned loads into the integer unit, and we can't
10052          use LDM or LDRD.  */
10053       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10054       if (speed_p)
10055         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10056                   + extra_cost->ldst.load_unaligned);
10057
10058 #ifdef NOT_YET
10059       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10060                                  ADDR_SPACE_GENERIC, speed_p);
10061 #endif
10062       return true;
10063
10064     case UNSPEC_UNALIGNED_STORE:
10065       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10066       if (speed_p)
10067         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10068                   + extra_cost->ldst.store_unaligned);
10069
10070       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10071 #ifdef NOT_YET
10072       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10073                                  ADDR_SPACE_GENERIC, speed_p);
10074 #endif
10075       return true;
10076
10077     case UNSPEC_VRINTZ:
10078     case UNSPEC_VRINTP:
10079     case UNSPEC_VRINTM:
10080     case UNSPEC_VRINTR:
10081     case UNSPEC_VRINTX:
10082     case UNSPEC_VRINTA:
10083       if (speed_p)
10084         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10085
10086       return true;
10087     default:
10088       *cost = COSTS_N_INSNS (2);
10089       break;
10090     }
10091   return true;
10092 }
10093
10094 /* Cost of a libcall.  We assume one insn per argument, an amount for the
10095    call (one insn for -Os) and then one for processing the result.  */
10096 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10097
10098 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
10099         do                                                              \
10100           {                                                             \
10101             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
10102             if (shift_op != NULL                                        \
10103                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
10104               {                                                         \
10105                 if (shift_reg)                                          \
10106                   {                                                     \
10107                     if (speed_p)                                        \
10108                       *cost += extra_cost->alu.arith_shift_reg;         \
10109                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10110                                        ASHIFT, 1, speed_p);             \
10111                   }                                                     \
10112                 else if (speed_p)                                       \
10113                   *cost += extra_cost->alu.arith_shift;                 \
10114                                                                         \
10115                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
10116                                     ASHIFT, 0, speed_p)                 \
10117                           + rtx_cost (XEXP (x, 1 - IDX),                \
10118                                       GET_MODE (shift_op),              \
10119                                       OP, 1, speed_p));                 \
10120                 return true;                                            \
10121               }                                                         \
10122           }                                                             \
10123         while (0)
10124
10125 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
10126    considering the costs of the addressing mode and memory access
10127    separately.  */
10128 static bool
10129 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10130                int *cost, bool speed_p)
10131 {
10132   machine_mode mode = GET_MODE (x);
10133
10134   *cost = COSTS_N_INSNS (1);
10135
10136   if (flag_pic
10137       && GET_CODE (XEXP (x, 0)) == PLUS
10138       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10139     /* This will be split into two instructions.  Add the cost of the
10140        additional instruction here.  The cost of the memory access is computed
10141        below.  See arm.md:calculate_pic_address.  */
10142     *cost += COSTS_N_INSNS (1);
10143
10144   /* Calculate cost of the addressing mode.  */
10145   if (speed_p)
10146     {
10147       arm_addr_mode_op op_type;
10148       switch (GET_CODE (XEXP (x, 0)))
10149         {
10150         default:
10151         case REG:
10152           op_type = AMO_DEFAULT;
10153           break;
10154         case MINUS:
10155           /* MINUS does not appear in RTL, but the architecture supports it,
10156              so handle this case defensively.  */
10157           /* fall through */
10158         case PLUS:
10159           op_type = AMO_NO_WB;
10160           break;
10161         case PRE_INC:
10162         case PRE_DEC:
10163         case POST_INC:
10164         case POST_DEC:
10165         case PRE_MODIFY:
10166         case POST_MODIFY:
10167           op_type = AMO_WB;
10168           break;
10169         }
10170
10171       if (VECTOR_MODE_P (mode))
10172           *cost += current_tune->addr_mode_costs->vector[op_type];
10173       else if (FLOAT_MODE_P (mode))
10174           *cost += current_tune->addr_mode_costs->fp[op_type];
10175       else
10176           *cost += current_tune->addr_mode_costs->integer[op_type];
10177     }
10178
10179   /* Calculate cost of memory access.  */
10180   if (speed_p)
10181     {
10182       if (FLOAT_MODE_P (mode))
10183         {
10184           if (GET_MODE_SIZE (mode) == 8)
10185             *cost += extra_cost->ldst.loadd;
10186           else
10187             *cost += extra_cost->ldst.loadf;
10188         }
10189       else if (VECTOR_MODE_P (mode))
10190         *cost += extra_cost->ldst.loadv;
10191       else
10192         {
10193           /* Integer modes */
10194           if (GET_MODE_SIZE (mode) == 8)
10195             *cost += extra_cost->ldst.ldrd;
10196           else
10197             *cost += extra_cost->ldst.load;
10198         }
10199     }
10200
10201   return true;
10202 }
10203
10204 /* Helper for arm_bfi_p.  */
10205 static bool
10206 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10207 {
10208   unsigned HOST_WIDE_INT const1;
10209   unsigned HOST_WIDE_INT const2 = 0;
10210
10211   if (!CONST_INT_P (XEXP (op0, 1)))
10212     return false;
10213
10214   const1 = UINTVAL (XEXP (op0, 1));
10215   if (!CONST_INT_P (XEXP (op1, 1))
10216       || ~UINTVAL (XEXP (op1, 1)) != const1)
10217     return false;
10218
10219   if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10220       && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10221     {
10222       const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10223       *sub0 = XEXP (XEXP (op0, 0), 0);
10224     }
10225   else
10226     *sub0 = XEXP (op0, 0);
10227
10228   if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10229     return false;
10230
10231   *sub1 = XEXP (op1, 0);
10232   return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10233 }
10234
10235 /* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
10236    format looks something like:
10237
10238    (IOR (AND (reg1) (~const1))
10239         (AND (ASHIFT (reg2) (const2))
10240              (const1)))
10241
10242    where const1 is a consecutive sequence of 1-bits with the
10243    least-significant non-zero bit starting at bit position const2.  If
10244    const2 is zero, then the shift will not appear at all, due to
10245    canonicalization.  The two arms of the IOR expression may be
10246    flipped.  */
10247 static bool
10248 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10249 {
10250   if (GET_CODE (x) != IOR)
10251     return false;
10252   if (GET_CODE (XEXP (x, 0)) != AND
10253       || GET_CODE (XEXP (x, 1)) != AND)
10254     return false;
10255   return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10256           || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10257 }
10258
10259 /* RTX costs.  Make an estimate of the cost of executing the operation
10260    X, which is contained within an operation with code OUTER_CODE.
10261    SPEED_P indicates whether the cost desired is the performance cost,
10262    or the size cost.  The estimate is stored in COST and the return
10263    value is TRUE if the cost calculation is final, or FALSE if the
10264    caller should recurse through the operands of X to add additional
10265    costs.
10266
10267    We currently make no attempt to model the size savings of Thumb-2
10268    16-bit instructions.  At the normal points in compilation where
10269    this code is called we have no measure of whether the condition
10270    flags are live or not, and thus no realistic way to determine what
10271    the size will eventually be.  */
10272 static bool
10273 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10274                    const struct cpu_cost_table *extra_cost,
10275                    int *cost, bool speed_p)
10276 {
10277   machine_mode mode = GET_MODE (x);
10278
10279   *cost = COSTS_N_INSNS (1);
10280
10281   if (TARGET_THUMB1)
10282     {
10283       if (speed_p)
10284         *cost = thumb1_rtx_costs (x, code, outer_code);
10285       else
10286         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10287       return true;
10288     }
10289
10290   switch (code)
10291     {
10292     case SET:
10293       *cost = 0;
10294       /* SET RTXs don't have a mode so we get it from the destination.  */
10295       mode = GET_MODE (SET_DEST (x));
10296
10297       if (REG_P (SET_SRC (x))
10298           && REG_P (SET_DEST (x)))
10299         {
10300           /* Assume that most copies can be done with a single insn,
10301              unless we don't have HW FP, in which case everything
10302              larger than word mode will require two insns.  */
10303           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10304                                    && GET_MODE_SIZE (mode) > 4)
10305                                   || mode == DImode)
10306                                  ? 2 : 1);
10307           /* Conditional register moves can be encoded
10308              in 16 bits in Thumb mode.  */
10309           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10310             *cost >>= 1;
10311
10312           return true;
10313         }
10314
10315       if (CONST_INT_P (SET_SRC (x)))
10316         {
10317           /* Handle CONST_INT here, since the value doesn't have a mode
10318              and we would otherwise be unable to work out the true cost.  */
10319           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10320                             0, speed_p);
10321           outer_code = SET;
10322           /* Slightly lower the cost of setting a core reg to a constant.
10323              This helps break up chains and allows for better scheduling.  */
10324           if (REG_P (SET_DEST (x))
10325               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10326             *cost -= 1;
10327           x = SET_SRC (x);
10328           /* Immediate moves with an immediate in the range [0, 255] can be
10329              encoded in 16 bits in Thumb mode.  */
10330           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10331               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10332             *cost >>= 1;
10333           goto const_int_cost;
10334         }
10335
10336       return false;
10337
10338     case MEM:
10339       return arm_mem_costs (x, extra_cost, cost, speed_p);
10340
10341     case PARALLEL:
10342     {
10343    /* Calculations of LDM costs are complex.  We assume an initial cost
10344    (ldm_1st) which will load the number of registers mentioned in
10345    ldm_regs_per_insn_1st registers; then each additional
10346    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10347    formula for N regs is thus:
10348
10349    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10350                              + ldm_regs_per_insn_subsequent - 1)
10351                             / ldm_regs_per_insn_subsequent).
10352
10353    Additional costs may also be added for addressing.  A similar
10354    formula is used for STM.  */
10355
10356       bool is_ldm = load_multiple_operation (x, SImode);
10357       bool is_stm = store_multiple_operation (x, SImode);
10358
10359       if (is_ldm || is_stm)
10360         {
10361           if (speed_p)
10362             {
10363               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10364               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10365                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10366                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10367               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10368                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10369                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10370
10371               *cost += regs_per_insn_1st
10372                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10373                                             + regs_per_insn_sub - 1)
10374                                           / regs_per_insn_sub);
10375               return true;
10376             }
10377
10378         }
10379       return false;
10380     }
10381     case DIV:
10382     case UDIV:
10383       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10384           && (mode == SFmode || !TARGET_VFP_SINGLE))
10385         *cost += COSTS_N_INSNS (speed_p
10386                                ? extra_cost->fp[mode != SFmode].div : 0);
10387       else if (mode == SImode && TARGET_IDIV)
10388         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10389       else
10390         *cost = LIBCALL_COST (2);
10391
10392       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10393          possible udiv is prefered.  */
10394       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10395       return false;     /* All arguments must be in registers.  */
10396
10397     case MOD:
10398       /* MOD by a power of 2 can be expanded as:
10399          rsbs    r1, r0, #0
10400          and     r0, r0, #(n - 1)
10401          and     r1, r1, #(n - 1)
10402          rsbpl   r0, r1, #0.  */
10403       if (CONST_INT_P (XEXP (x, 1))
10404           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10405           && mode == SImode)
10406         {
10407           *cost += COSTS_N_INSNS (3);
10408
10409           if (speed_p)
10410             *cost += 2 * extra_cost->alu.logical
10411                      + extra_cost->alu.arith;
10412           return true;
10413         }
10414
10415     /* Fall-through.  */
10416     case UMOD:
10417       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10418          possible udiv is prefered.  */
10419       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10420       return false;     /* All arguments must be in registers.  */
10421
10422     case ROTATE:
10423       if (mode == SImode && REG_P (XEXP (x, 1)))
10424         {
10425           *cost += (COSTS_N_INSNS (1)
10426                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10427           if (speed_p)
10428             *cost += extra_cost->alu.shift_reg;
10429           return true;
10430         }
10431       /* Fall through */
10432     case ROTATERT:
10433     case ASHIFT:
10434     case LSHIFTRT:
10435     case ASHIFTRT:
10436       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10437         {
10438           *cost += (COSTS_N_INSNS (2)
10439                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10440           if (speed_p)
10441             *cost += 2 * extra_cost->alu.shift;
10442           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10443           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10444             *cost += 1;
10445           return true;
10446         }
10447       else if (mode == SImode)
10448         {
10449           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10450           /* Slightly disparage register shifts at -Os, but not by much.  */
10451           if (!CONST_INT_P (XEXP (x, 1)))
10452             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10453                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10454           return true;
10455         }
10456       else if (GET_MODE_CLASS (mode) == MODE_INT
10457                && GET_MODE_SIZE (mode) < 4)
10458         {
10459           if (code == ASHIFT)
10460             {
10461               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10462               /* Slightly disparage register shifts at -Os, but not by
10463                  much.  */
10464               if (!CONST_INT_P (XEXP (x, 1)))
10465                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10466                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10467             }
10468           else if (code == LSHIFTRT || code == ASHIFTRT)
10469             {
10470               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10471                 {
10472                   /* Can use SBFX/UBFX.  */
10473                   if (speed_p)
10474                     *cost += extra_cost->alu.bfx;
10475                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10476                 }
10477               else
10478                 {
10479                   *cost += COSTS_N_INSNS (1);
10480                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10481                   if (speed_p)
10482                     {
10483                       if (CONST_INT_P (XEXP (x, 1)))
10484                         *cost += 2 * extra_cost->alu.shift;
10485                       else
10486                         *cost += (extra_cost->alu.shift
10487                                   + extra_cost->alu.shift_reg);
10488                     }
10489                   else
10490                     /* Slightly disparage register shifts.  */
10491                     *cost += !CONST_INT_P (XEXP (x, 1));
10492                 }
10493             }
10494           else /* Rotates.  */
10495             {
10496               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10497               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10498               if (speed_p)
10499                 {
10500                   if (CONST_INT_P (XEXP (x, 1)))
10501                     *cost += (2 * extra_cost->alu.shift
10502                               + extra_cost->alu.log_shift);
10503                   else
10504                     *cost += (extra_cost->alu.shift
10505                               + extra_cost->alu.shift_reg
10506                               + extra_cost->alu.log_shift_reg);
10507                 }
10508             }
10509           return true;
10510         }
10511
10512       *cost = LIBCALL_COST (2);
10513       return false;
10514
10515     case BSWAP:
10516       if (arm_arch6)
10517         {
10518           if (mode == SImode)
10519             {
10520               if (speed_p)
10521                 *cost += extra_cost->alu.rev;
10522
10523               return false;
10524             }
10525         }
10526       else
10527         {
10528         /* No rev instruction available.  Look at arm_legacy_rev
10529            and thumb_legacy_rev for the form of RTL used then.  */
10530           if (TARGET_THUMB)
10531             {
10532               *cost += COSTS_N_INSNS (9);
10533
10534               if (speed_p)
10535                 {
10536                   *cost += 6 * extra_cost->alu.shift;
10537                   *cost += 3 * extra_cost->alu.logical;
10538                 }
10539             }
10540           else
10541             {
10542               *cost += COSTS_N_INSNS (4);
10543
10544               if (speed_p)
10545                 {
10546                   *cost += 2 * extra_cost->alu.shift;
10547                   *cost += extra_cost->alu.arith_shift;
10548                   *cost += 2 * extra_cost->alu.logical;
10549                 }
10550             }
10551           return true;
10552         }
10553       return false;
10554
10555     case MINUS:
10556       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10557           && (mode == SFmode || !TARGET_VFP_SINGLE))
10558         {
10559           if (GET_CODE (XEXP (x, 0)) == MULT
10560               || GET_CODE (XEXP (x, 1)) == MULT)
10561             {
10562               rtx mul_op0, mul_op1, sub_op;
10563
10564               if (speed_p)
10565                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10566
10567               if (GET_CODE (XEXP (x, 0)) == MULT)
10568                 {
10569                   mul_op0 = XEXP (XEXP (x, 0), 0);
10570                   mul_op1 = XEXP (XEXP (x, 0), 1);
10571                   sub_op = XEXP (x, 1);
10572                 }
10573               else
10574                 {
10575                   mul_op0 = XEXP (XEXP (x, 1), 0);
10576                   mul_op1 = XEXP (XEXP (x, 1), 1);
10577                   sub_op = XEXP (x, 0);
10578                 }
10579
10580               /* The first operand of the multiply may be optionally
10581                  negated.  */
10582               if (GET_CODE (mul_op0) == NEG)
10583                 mul_op0 = XEXP (mul_op0, 0);
10584
10585               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10586                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10587                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10588
10589               return true;
10590             }
10591
10592           if (speed_p)
10593             *cost += extra_cost->fp[mode != SFmode].addsub;
10594           return false;
10595         }
10596
10597       if (mode == SImode)
10598         {
10599           rtx shift_by_reg = NULL;
10600           rtx shift_op;
10601           rtx non_shift_op;
10602           rtx op0 = XEXP (x, 0);
10603           rtx op1 = XEXP (x, 1);
10604
10605           /* Factor out any borrow operation.  There's more than one way
10606              of expressing this; try to recognize them all.  */
10607           if (GET_CODE (op0) == MINUS)
10608             {
10609               if (arm_borrow_operation (op1, SImode))
10610                 {
10611                   op1 = XEXP (op0, 1);
10612                   op0 = XEXP (op0, 0);
10613                 }
10614               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10615                 op0 = XEXP (op0, 0);
10616             }
10617           else if (GET_CODE (op1) == PLUS
10618                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10619             op1 = XEXP (op1, 0);
10620           else if (GET_CODE (op0) == NEG
10621                    && arm_borrow_operation (op1, SImode))
10622             {
10623               /* Negate with carry-in.  For Thumb2 this is done with
10624                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10625                  RSC instruction that exists in Arm mode.  */
10626               if (speed_p)
10627                 *cost += (TARGET_THUMB2
10628                           ? extra_cost->alu.arith_shift
10629                           : extra_cost->alu.arith);
10630               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10631               return true;
10632             }
10633           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10634              Note we do mean ~borrow here.  */
10635           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10636             {
10637               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10638               return true;
10639             }
10640
10641           shift_op = shifter_op_p (op0, &shift_by_reg);
10642           if (shift_op == NULL)
10643             {
10644               shift_op = shifter_op_p (op1, &shift_by_reg);
10645               non_shift_op = op0;
10646             }
10647           else
10648             non_shift_op = op1;
10649
10650           if (shift_op != NULL)
10651             {
10652               if (shift_by_reg != NULL)
10653                 {
10654                   if (speed_p)
10655                     *cost += extra_cost->alu.arith_shift_reg;
10656                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10657                 }
10658               else if (speed_p)
10659                 *cost += extra_cost->alu.arith_shift;
10660
10661               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10662               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10663               return true;
10664             }
10665
10666           if (arm_arch_thumb2
10667               && GET_CODE (XEXP (x, 1)) == MULT)
10668             {
10669               /* MLS.  */
10670               if (speed_p)
10671                 *cost += extra_cost->mult[0].add;
10672               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10673               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10674               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10675               return true;
10676             }
10677
10678           if (CONST_INT_P (op0))
10679             {
10680               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10681                                             INTVAL (op0), NULL_RTX,
10682                                             NULL_RTX, 1, 0);
10683               *cost = COSTS_N_INSNS (insns);
10684               if (speed_p)
10685                 *cost += insns * extra_cost->alu.arith;
10686               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10687               return true;
10688             }
10689           else if (speed_p)
10690             *cost += extra_cost->alu.arith;
10691
10692           /* Don't recurse as we don't want to cost any borrow that
10693              we've stripped.  */
10694           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10695           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10696           return true;
10697         }
10698
10699       if (GET_MODE_CLASS (mode) == MODE_INT
10700           && GET_MODE_SIZE (mode) < 4)
10701         {
10702           rtx shift_op, shift_reg;
10703           shift_reg = NULL;
10704
10705           /* We check both sides of the MINUS for shifter operands since,
10706              unlike PLUS, it's not commutative.  */
10707
10708           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10709           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10710
10711           /* Slightly disparage, as we might need to widen the result.  */
10712           *cost += 1;
10713           if (speed_p)
10714             *cost += extra_cost->alu.arith;
10715
10716           if (CONST_INT_P (XEXP (x, 0)))
10717             {
10718               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10719               return true;
10720             }
10721
10722           return false;
10723         }
10724
10725       if (mode == DImode)
10726         {
10727           *cost += COSTS_N_INSNS (1);
10728
10729           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10730             {
10731               rtx op1 = XEXP (x, 1);
10732
10733               if (speed_p)
10734                 *cost += 2 * extra_cost->alu.arith;
10735
10736               if (GET_CODE (op1) == ZERO_EXTEND)
10737                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10738                                    0, speed_p);
10739               else
10740                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10741               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10742                                  0, speed_p);
10743               return true;
10744             }
10745           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10746             {
10747               if (speed_p)
10748                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10749               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10750                                   0, speed_p)
10751                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10752               return true;
10753             }
10754           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10755                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10756             {
10757               if (speed_p)
10758                 *cost += (extra_cost->alu.arith
10759                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10760                              ? extra_cost->alu.arith
10761                              : extra_cost->alu.arith_shift));
10762               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10763                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10764                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10765               return true;
10766             }
10767
10768           if (speed_p)
10769             *cost += 2 * extra_cost->alu.arith;
10770           return false;
10771         }
10772
10773       /* Vector mode?  */
10774
10775       *cost = LIBCALL_COST (2);
10776       return false;
10777
10778     case PLUS:
10779       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10780           && (mode == SFmode || !TARGET_VFP_SINGLE))
10781         {
10782           if (GET_CODE (XEXP (x, 0)) == MULT)
10783             {
10784               rtx mul_op0, mul_op1, add_op;
10785
10786               if (speed_p)
10787                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10788
10789               mul_op0 = XEXP (XEXP (x, 0), 0);
10790               mul_op1 = XEXP (XEXP (x, 0), 1);
10791               add_op = XEXP (x, 1);
10792
10793               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10794                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10795                         + rtx_cost (add_op, mode, code, 0, speed_p));
10796
10797               return true;
10798             }
10799
10800           if (speed_p)
10801             *cost += extra_cost->fp[mode != SFmode].addsub;
10802           return false;
10803         }
10804       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10805         {
10806           *cost = LIBCALL_COST (2);
10807           return false;
10808         }
10809
10810         /* Narrow modes can be synthesized in SImode, but the range
10811            of useful sub-operations is limited.  Check for shift operations
10812            on one of the operands.  Only left shifts can be used in the
10813            narrow modes.  */
10814       if (GET_MODE_CLASS (mode) == MODE_INT
10815           && GET_MODE_SIZE (mode) < 4)
10816         {
10817           rtx shift_op, shift_reg;
10818           shift_reg = NULL;
10819
10820           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10821
10822           if (CONST_INT_P (XEXP (x, 1)))
10823             {
10824               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10825                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10826                                             NULL_RTX, 1, 0);
10827               *cost = COSTS_N_INSNS (insns);
10828               if (speed_p)
10829                 *cost += insns * extra_cost->alu.arith;
10830               /* Slightly penalize a narrow operation as the result may
10831                  need widening.  */
10832               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10833               return true;
10834             }
10835
10836           /* Slightly penalize a narrow operation as the result may
10837              need widening.  */
10838           *cost += 1;
10839           if (speed_p)
10840             *cost += extra_cost->alu.arith;
10841
10842           return false;
10843         }
10844
10845       if (mode == SImode)
10846         {
10847           rtx shift_op, shift_reg;
10848
10849           if (TARGET_INT_SIMD
10850               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10851                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10852             {
10853               /* UXTA[BH] or SXTA[BH].  */
10854               if (speed_p)
10855                 *cost += extra_cost->alu.extend_arith;
10856               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10857                                   0, speed_p)
10858                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10859               return true;
10860             }
10861
10862           rtx op0 = XEXP (x, 0);
10863           rtx op1 = XEXP (x, 1);
10864
10865           /* Handle a side effect of adding in the carry to an addition.  */
10866           if (GET_CODE (op0) == PLUS
10867               && arm_carry_operation (op1, mode))
10868             {
10869               op1 = XEXP (op0, 1);
10870               op0 = XEXP (op0, 0);
10871             }
10872           else if (GET_CODE (op1) == PLUS
10873                    && arm_carry_operation (op0, mode))
10874             {
10875               op0 = XEXP (op1, 0);
10876               op1 = XEXP (op1, 1);
10877             }
10878           else if (GET_CODE (op0) == PLUS)
10879             {
10880               op0 = strip_carry_operation (op0);
10881               if (swap_commutative_operands_p (op0, op1))
10882                 std::swap (op0, op1);
10883             }
10884
10885           if (arm_carry_operation (op0, mode))
10886             {
10887               /* Adding the carry to a register is a canonicalization of
10888                  adding 0 to the register plus the carry.  */
10889               if (speed_p)
10890                 *cost += extra_cost->alu.arith;
10891               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10892               return true;
10893             }
10894
10895           shift_reg = NULL;
10896           shift_op = shifter_op_p (op0, &shift_reg);
10897           if (shift_op != NULL)
10898             {
10899               if (shift_reg)
10900                 {
10901                   if (speed_p)
10902                     *cost += extra_cost->alu.arith_shift_reg;
10903                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10904                 }
10905               else if (speed_p)
10906                 *cost += extra_cost->alu.arith_shift;
10907
10908               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10909                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10910               return true;
10911             }
10912
10913           if (GET_CODE (op0) == MULT)
10914             {
10915               rtx mul_op = op0;
10916
10917               if (TARGET_DSP_MULTIPLY
10918                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10919                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10920                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10921                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10922                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10923                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10924                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10925                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10926                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10927                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10928                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10929                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10930                                       == 16))))))
10931                 {
10932                   /* SMLA[BT][BT].  */
10933                   if (speed_p)
10934                     *cost += extra_cost->mult[0].extend_add;
10935                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10936                                       SIGN_EXTEND, 0, speed_p)
10937                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10938                                         SIGN_EXTEND, 0, speed_p)
10939                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
10940                   return true;
10941                 }
10942
10943               if (speed_p)
10944                 *cost += extra_cost->mult[0].add;
10945               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10946                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10947                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
10948               return true;
10949             }
10950
10951           if (CONST_INT_P (op1))
10952             {
10953               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10954                                             INTVAL (op1), NULL_RTX,
10955                                             NULL_RTX, 1, 0);
10956               *cost = COSTS_N_INSNS (insns);
10957               if (speed_p)
10958                 *cost += insns * extra_cost->alu.arith;
10959               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10960               return true;
10961             }
10962
10963           if (speed_p)
10964             *cost += extra_cost->alu.arith;
10965
10966           /* Don't recurse here because we want to test the operands
10967              without any carry operation.  */
10968           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10969           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10970           return true;
10971         }
10972
10973       if (mode == DImode)
10974         {
10975           if (GET_CODE (XEXP (x, 0)) == MULT
10976               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10977                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10978                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10979                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10980             {
10981               if (speed_p)
10982                 *cost += extra_cost->mult[1].extend_add;
10983               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10984                                   ZERO_EXTEND, 0, speed_p)
10985                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10986                                     ZERO_EXTEND, 0, speed_p)
10987                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10988               return true;
10989             }
10990
10991           *cost += COSTS_N_INSNS (1);
10992
10993           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10994               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10995             {
10996               if (speed_p)
10997                 *cost += (extra_cost->alu.arith
10998                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10999                              ? extra_cost->alu.arith
11000                              : extra_cost->alu.arith_shift));
11001
11002               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11003                                   0, speed_p)
11004                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11005               return true;
11006             }
11007
11008           if (speed_p)
11009             *cost += 2 * extra_cost->alu.arith;
11010           return false;
11011         }
11012
11013       /* Vector mode?  */
11014       *cost = LIBCALL_COST (2);
11015       return false;
11016     case IOR:
11017       {
11018         rtx sub0, sub1;
11019         if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11020           {
11021             if (speed_p)
11022               *cost += extra_cost->alu.rev;
11023
11024             return true;
11025           }
11026         else if (mode == SImode && arm_arch_thumb2
11027                  && arm_bfi_p (x, &sub0, &sub1))
11028           {
11029             *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11030             *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11031             if (speed_p)
11032               *cost += extra_cost->alu.bfi;
11033
11034             return true;
11035           }
11036       }
11037
11038       /* Fall through.  */
11039     case AND: case XOR:
11040       if (mode == SImode)
11041         {
11042           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11043           rtx op0 = XEXP (x, 0);
11044           rtx shift_op, shift_reg;
11045
11046           if (subcode == NOT
11047               && (code == AND
11048                   || (code == IOR && TARGET_THUMB2)))
11049             op0 = XEXP (op0, 0);
11050
11051           shift_reg = NULL;
11052           shift_op = shifter_op_p (op0, &shift_reg);
11053           if (shift_op != NULL)
11054             {
11055               if (shift_reg)
11056                 {
11057                   if (speed_p)
11058                     *cost += extra_cost->alu.log_shift_reg;
11059                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11060                 }
11061               else if (speed_p)
11062                 *cost += extra_cost->alu.log_shift;
11063
11064               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11065                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11066               return true;
11067             }
11068
11069           if (CONST_INT_P (XEXP (x, 1)))
11070             {
11071               int insns = arm_gen_constant (code, SImode, NULL_RTX,
11072                                             INTVAL (XEXP (x, 1)), NULL_RTX,
11073                                             NULL_RTX, 1, 0);
11074
11075               *cost = COSTS_N_INSNS (insns);
11076               if (speed_p)
11077                 *cost += insns * extra_cost->alu.logical;
11078               *cost += rtx_cost (op0, mode, code, 0, speed_p);
11079               return true;
11080             }
11081
11082           if (speed_p)
11083             *cost += extra_cost->alu.logical;
11084           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11085                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11086           return true;
11087         }
11088
11089       if (mode == DImode)
11090         {
11091           rtx op0 = XEXP (x, 0);
11092           enum rtx_code subcode = GET_CODE (op0);
11093
11094           *cost += COSTS_N_INSNS (1);
11095
11096           if (subcode == NOT
11097               && (code == AND
11098                   || (code == IOR && TARGET_THUMB2)))
11099             op0 = XEXP (op0, 0);
11100
11101           if (GET_CODE (op0) == ZERO_EXTEND)
11102             {
11103               if (speed_p)
11104                 *cost += 2 * extra_cost->alu.logical;
11105
11106               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11107                                   0, speed_p)
11108                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11109               return true;
11110             }
11111           else if (GET_CODE (op0) == SIGN_EXTEND)
11112             {
11113               if (speed_p)
11114                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11115
11116               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11117                                   0, speed_p)
11118                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11119               return true;
11120             }
11121
11122           if (speed_p)
11123             *cost += 2 * extra_cost->alu.logical;
11124
11125           return true;
11126         }
11127       /* Vector mode?  */
11128
11129       *cost = LIBCALL_COST (2);
11130       return false;
11131
11132     case MULT:
11133       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11134           && (mode == SFmode || !TARGET_VFP_SINGLE))
11135         {
11136           rtx op0 = XEXP (x, 0);
11137
11138           if (GET_CODE (op0) == NEG && !flag_rounding_math)
11139             op0 = XEXP (op0, 0);
11140
11141           if (speed_p)
11142             *cost += extra_cost->fp[mode != SFmode].mult;
11143
11144           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11145                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11146           return true;
11147         }
11148       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11149         {
11150           *cost = LIBCALL_COST (2);
11151           return false;
11152         }
11153
11154       if (mode == SImode)
11155         {
11156           if (TARGET_DSP_MULTIPLY
11157               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11158                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11159                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11160                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11161                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11162                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11163                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11164                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11165                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11166                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11167                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11168                               && (INTVAL (XEXP (XEXP (x, 1), 1))
11169                                   == 16))))))
11170             {
11171               /* SMUL[TB][TB].  */
11172               if (speed_p)
11173                 *cost += extra_cost->mult[0].extend;
11174               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11175                                  SIGN_EXTEND, 0, speed_p);
11176               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11177                                  SIGN_EXTEND, 1, speed_p);
11178               return true;
11179             }
11180           if (speed_p)
11181             *cost += extra_cost->mult[0].simple;
11182           return false;
11183         }
11184
11185       if (mode == DImode)
11186         {
11187           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11188                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11189                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11190                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11191             {
11192               if (speed_p)
11193                 *cost += extra_cost->mult[1].extend;
11194               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11195                                   ZERO_EXTEND, 0, speed_p)
11196                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11197                                     ZERO_EXTEND, 0, speed_p));
11198               return true;
11199             }
11200
11201           *cost = LIBCALL_COST (2);
11202           return false;
11203         }
11204
11205       /* Vector mode?  */
11206       *cost = LIBCALL_COST (2);
11207       return false;
11208
11209     case NEG:
11210       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11211           && (mode == SFmode || !TARGET_VFP_SINGLE))
11212         {
11213           if (GET_CODE (XEXP (x, 0)) == MULT)
11214             {
11215               /* VNMUL.  */
11216               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11217               return true;
11218             }
11219
11220           if (speed_p)
11221             *cost += extra_cost->fp[mode != SFmode].neg;
11222
11223           return false;
11224         }
11225       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11226         {
11227           *cost = LIBCALL_COST (1);
11228           return false;
11229         }
11230
11231       if (mode == SImode)
11232         {
11233           if (GET_CODE (XEXP (x, 0)) == ABS)
11234             {
11235               *cost += COSTS_N_INSNS (1);
11236               /* Assume the non-flag-changing variant.  */
11237               if (speed_p)
11238                 *cost += (extra_cost->alu.log_shift
11239                           + extra_cost->alu.arith_shift);
11240               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11241               return true;
11242             }
11243
11244           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11245               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11246             {
11247               *cost += COSTS_N_INSNS (1);
11248               /* No extra cost for MOV imm and MVN imm.  */
11249               /* If the comparison op is using the flags, there's no further
11250                  cost, otherwise we need to add the cost of the comparison.  */
11251               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11252                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11253                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11254                 {
11255                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11256                   *cost += (COSTS_N_INSNS (1)
11257                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11258                                         0, speed_p)
11259                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11260                                         1, speed_p));
11261                   if (speed_p)
11262                     *cost += extra_cost->alu.arith;
11263                 }
11264               return true;
11265             }
11266
11267           if (speed_p)
11268             *cost += extra_cost->alu.arith;
11269           return false;
11270         }
11271
11272       if (GET_MODE_CLASS (mode) == MODE_INT
11273           && GET_MODE_SIZE (mode) < 4)
11274         {
11275           /* Slightly disparage, as we might need an extend operation.  */
11276           *cost += 1;
11277           if (speed_p)
11278             *cost += extra_cost->alu.arith;
11279           return false;
11280         }
11281
11282       if (mode == DImode)
11283         {
11284           *cost += COSTS_N_INSNS (1);
11285           if (speed_p)
11286             *cost += 2 * extra_cost->alu.arith;
11287           return false;
11288         }
11289
11290       /* Vector mode?  */
11291       *cost = LIBCALL_COST (1);
11292       return false;
11293
11294     case NOT:
11295       if (mode == SImode)
11296         {
11297           rtx shift_op;
11298           rtx shift_reg = NULL;
11299
11300           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11301
11302           if (shift_op)
11303             {
11304               if (shift_reg != NULL)
11305                 {
11306                   if (speed_p)
11307                     *cost += extra_cost->alu.log_shift_reg;
11308                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11309                 }
11310               else if (speed_p)
11311                 *cost += extra_cost->alu.log_shift;
11312               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11313               return true;
11314             }
11315
11316           if (speed_p)
11317             *cost += extra_cost->alu.logical;
11318           return false;
11319         }
11320       if (mode == DImode)
11321         {
11322           *cost += COSTS_N_INSNS (1);
11323           return false;
11324         }
11325
11326       /* Vector mode?  */
11327
11328       *cost += LIBCALL_COST (1);
11329       return false;
11330
11331     case IF_THEN_ELSE:
11332       {
11333         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11334           {
11335             *cost += COSTS_N_INSNS (3);
11336             return true;
11337           }
11338         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11339         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11340
11341         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11342         /* Assume that if one arm of the if_then_else is a register,
11343            that it will be tied with the result and eliminate the
11344            conditional insn.  */
11345         if (REG_P (XEXP (x, 1)))
11346           *cost += op2cost;
11347         else if (REG_P (XEXP (x, 2)))
11348           *cost += op1cost;
11349         else
11350           {
11351             if (speed_p)
11352               {
11353                 if (extra_cost->alu.non_exec_costs_exec)
11354                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11355                 else
11356                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11357               }
11358             else
11359               *cost += op1cost + op2cost;
11360           }
11361       }
11362       return true;
11363
11364     case COMPARE:
11365       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11366         *cost = 0;
11367       else
11368         {
11369           machine_mode op0mode;
11370           /* We'll mostly assume that the cost of a compare is the cost of the
11371              LHS.  However, there are some notable exceptions.  */
11372
11373           /* Floating point compares are never done as side-effects.  */
11374           op0mode = GET_MODE (XEXP (x, 0));
11375           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11376               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11377             {
11378               if (speed_p)
11379                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11380
11381               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11382                 {
11383                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11384                   return true;
11385                 }
11386
11387               return false;
11388             }
11389           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11390             {
11391               *cost = LIBCALL_COST (2);
11392               return false;
11393             }
11394
11395           /* DImode compares normally take two insns.  */
11396           if (op0mode == DImode)
11397             {
11398               *cost += COSTS_N_INSNS (1);
11399               if (speed_p)
11400                 *cost += 2 * extra_cost->alu.arith;
11401               return false;
11402             }
11403
11404           if (op0mode == SImode)
11405             {
11406               rtx shift_op;
11407               rtx shift_reg;
11408
11409               if (XEXP (x, 1) == const0_rtx
11410                   && !(REG_P (XEXP (x, 0))
11411                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11412                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11413                 {
11414                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11415
11416                   /* Multiply operations that set the flags are often
11417                      significantly more expensive.  */
11418                   if (speed_p
11419                       && GET_CODE (XEXP (x, 0)) == MULT
11420                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11421                     *cost += extra_cost->mult[0].flag_setting;
11422
11423                   if (speed_p
11424                       && GET_CODE (XEXP (x, 0)) == PLUS
11425                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11426                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11427                                                             0), 1), mode))
11428                     *cost += extra_cost->mult[0].flag_setting;
11429                   return true;
11430                 }
11431
11432               shift_reg = NULL;
11433               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11434               if (shift_op != NULL)
11435                 {
11436                   if (shift_reg != NULL)
11437                     {
11438                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11439                                          1, speed_p);
11440                       if (speed_p)
11441                         *cost += extra_cost->alu.arith_shift_reg;
11442                     }
11443                   else if (speed_p)
11444                     *cost += extra_cost->alu.arith_shift;
11445                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11446                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11447                   return true;
11448                 }
11449
11450               if (speed_p)
11451                 *cost += extra_cost->alu.arith;
11452               if (CONST_INT_P (XEXP (x, 1))
11453                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11454                 {
11455                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11456                   return true;
11457                 }
11458               return false;
11459             }
11460
11461           /* Vector mode?  */
11462
11463           *cost = LIBCALL_COST (2);
11464           return false;
11465         }
11466       return true;
11467
11468     case EQ:
11469     case GE:
11470     case GT:
11471     case LE:
11472     case LT:
11473       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11474          vcle and vclt). */
11475       if (TARGET_NEON
11476           && TARGET_HARD_FLOAT
11477           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11478           && (XEXP (x, 1) == CONST0_RTX (mode)))
11479         {
11480           *cost = 0;
11481           return true;
11482         }
11483
11484       /* Fall through.  */
11485     case NE:
11486     case LTU:
11487     case LEU:
11488     case GEU:
11489     case GTU:
11490     case ORDERED:
11491     case UNORDERED:
11492     case UNEQ:
11493     case UNLE:
11494     case UNLT:
11495     case UNGE:
11496     case UNGT:
11497     case LTGT:
11498       if (outer_code == SET)
11499         {
11500           /* Is it a store-flag operation?  */
11501           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11502               && XEXP (x, 1) == const0_rtx)
11503             {
11504               /* Thumb also needs an IT insn.  */
11505               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11506               return true;
11507             }
11508           if (XEXP (x, 1) == const0_rtx)
11509             {
11510               switch (code)
11511                 {
11512                 case LT:
11513                   /* LSR Rd, Rn, #31.  */
11514                   if (speed_p)
11515                     *cost += extra_cost->alu.shift;
11516                   break;
11517
11518                 case EQ:
11519                   /* RSBS T1, Rn, #0
11520                      ADC  Rd, Rn, T1.  */
11521
11522                 case NE:
11523                   /* SUBS T1, Rn, #1
11524                      SBC  Rd, Rn, T1.  */
11525                   *cost += COSTS_N_INSNS (1);
11526                   break;
11527
11528                 case LE:
11529                   /* RSBS T1, Rn, Rn, LSR #31
11530                      ADC  Rd, Rn, T1. */
11531                   *cost += COSTS_N_INSNS (1);
11532                   if (speed_p)
11533                     *cost += extra_cost->alu.arith_shift;
11534                   break;
11535
11536                 case GT:
11537                   /* RSB  Rd, Rn, Rn, ASR #1
11538                      LSR  Rd, Rd, #31.  */
11539                   *cost += COSTS_N_INSNS (1);
11540                   if (speed_p)
11541                     *cost += (extra_cost->alu.arith_shift
11542                               + extra_cost->alu.shift);
11543                   break;
11544
11545                 case GE:
11546                   /* ASR  Rd, Rn, #31
11547                      ADD  Rd, Rn, #1.  */
11548                   *cost += COSTS_N_INSNS (1);
11549                   if (speed_p)
11550                     *cost += extra_cost->alu.shift;
11551                   break;
11552
11553                 default:
11554                   /* Remaining cases are either meaningless or would take
11555                      three insns anyway.  */
11556                   *cost = COSTS_N_INSNS (3);
11557                   break;
11558                 }
11559               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11560               return true;
11561             }
11562           else
11563             {
11564               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11565               if (CONST_INT_P (XEXP (x, 1))
11566                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11567                 {
11568                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11569                   return true;
11570                 }
11571
11572               return false;
11573             }
11574         }
11575       /* Not directly inside a set.  If it involves the condition code
11576          register it must be the condition for a branch, cond_exec or
11577          I_T_E operation.  Since the comparison is performed elsewhere
11578          this is just the control part which has no additional
11579          cost.  */
11580       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11581                && XEXP (x, 1) == const0_rtx)
11582         {
11583           *cost = 0;
11584           return true;
11585         }
11586       return false;
11587
11588     case ABS:
11589       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11590           && (mode == SFmode || !TARGET_VFP_SINGLE))
11591         {
11592           if (speed_p)
11593             *cost += extra_cost->fp[mode != SFmode].neg;
11594
11595           return false;
11596         }
11597       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11598         {
11599           *cost = LIBCALL_COST (1);
11600           return false;
11601         }
11602
11603       if (mode == SImode)
11604         {
11605           if (speed_p)
11606             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11607           return false;
11608         }
11609       /* Vector mode?  */
11610       *cost = LIBCALL_COST (1);
11611       return false;
11612
11613     case SIGN_EXTEND:
11614       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11615           && MEM_P (XEXP (x, 0)))
11616         {
11617           if (mode == DImode)
11618             *cost += COSTS_N_INSNS (1);
11619
11620           if (!speed_p)
11621             return true;
11622
11623           if (GET_MODE (XEXP (x, 0)) == SImode)
11624             *cost += extra_cost->ldst.load;
11625           else
11626             *cost += extra_cost->ldst.load_sign_extend;
11627
11628           if (mode == DImode)
11629             *cost += extra_cost->alu.shift;
11630
11631           return true;
11632         }
11633
11634       /* Widening from less than 32-bits requires an extend operation.  */
11635       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11636         {
11637           /* We have SXTB/SXTH.  */
11638           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11639           if (speed_p)
11640             *cost += extra_cost->alu.extend;
11641         }
11642       else if (GET_MODE (XEXP (x, 0)) != SImode)
11643         {
11644           /* Needs two shifts.  */
11645           *cost += COSTS_N_INSNS (1);
11646           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11647           if (speed_p)
11648             *cost += 2 * extra_cost->alu.shift;
11649         }
11650
11651       /* Widening beyond 32-bits requires one more insn.  */
11652       if (mode == DImode)
11653         {
11654           *cost += COSTS_N_INSNS (1);
11655           if (speed_p)
11656             *cost += extra_cost->alu.shift;
11657         }
11658
11659       return true;
11660
11661     case ZERO_EXTEND:
11662       if ((arm_arch4
11663            || GET_MODE (XEXP (x, 0)) == SImode
11664            || GET_MODE (XEXP (x, 0)) == QImode)
11665           && MEM_P (XEXP (x, 0)))
11666         {
11667           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11668
11669           if (mode == DImode)
11670             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11671
11672           return true;
11673         }
11674
11675       /* Widening from less than 32-bits requires an extend operation.  */
11676       if (GET_MODE (XEXP (x, 0)) == QImode)
11677         {
11678           /* UXTB can be a shorter instruction in Thumb2, but it might
11679              be slower than the AND Rd, Rn, #255 alternative.  When
11680              optimizing for speed it should never be slower to use
11681              AND, and we don't really model 16-bit vs 32-bit insns
11682              here.  */
11683           if (speed_p)
11684             *cost += extra_cost->alu.logical;
11685         }
11686       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11687         {
11688           /* We have UXTB/UXTH.  */
11689           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11690           if (speed_p)
11691             *cost += extra_cost->alu.extend;
11692         }
11693       else if (GET_MODE (XEXP (x, 0)) != SImode)
11694         {
11695           /* Needs two shifts.  It's marginally preferable to use
11696              shifts rather than two BIC instructions as the second
11697              shift may merge with a subsequent insn as a shifter
11698              op.  */
11699           *cost = COSTS_N_INSNS (2);
11700           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11701           if (speed_p)
11702             *cost += 2 * extra_cost->alu.shift;
11703         }
11704
11705       /* Widening beyond 32-bits requires one more insn.  */
11706       if (mode == DImode)
11707         {
11708           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11709         }
11710
11711       return true;
11712
11713     case CONST_INT:
11714       *cost = 0;
11715       /* CONST_INT has no mode, so we cannot tell for sure how many
11716          insns are really going to be needed.  The best we can do is
11717          look at the value passed.  If it fits in SImode, then assume
11718          that's the mode it will be used for.  Otherwise assume it
11719          will be used in DImode.  */
11720       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11721         mode = SImode;
11722       else
11723         mode = DImode;
11724
11725       /* Avoid blowing up in arm_gen_constant ().  */
11726       if (!(outer_code == PLUS
11727             || outer_code == AND
11728             || outer_code == IOR
11729             || outer_code == XOR
11730             || outer_code == MINUS))
11731         outer_code = SET;
11732
11733     const_int_cost:
11734       if (mode == SImode)
11735         {
11736           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11737                                                     INTVAL (x), NULL, NULL,
11738                                                     0, 0));
11739           /* Extra costs?  */
11740         }
11741       else
11742         {
11743           *cost += COSTS_N_INSNS (arm_gen_constant
11744                                   (outer_code, SImode, NULL,
11745                                    trunc_int_for_mode (INTVAL (x), SImode),
11746                                    NULL, NULL, 0, 0)
11747                                   + arm_gen_constant (outer_code, SImode, NULL,
11748                                                       INTVAL (x) >> 32, NULL,
11749                                                       NULL, 0, 0));
11750           /* Extra costs?  */
11751         }
11752
11753       return true;
11754
11755     case CONST:
11756     case LABEL_REF:
11757     case SYMBOL_REF:
11758       if (speed_p)
11759         {
11760           if (arm_arch_thumb2 && !flag_pic)
11761             *cost += COSTS_N_INSNS (1);
11762           else
11763             *cost += extra_cost->ldst.load;
11764         }
11765       else
11766         *cost += COSTS_N_INSNS (1);
11767
11768       if (flag_pic)
11769         {
11770           *cost += COSTS_N_INSNS (1);
11771           if (speed_p)
11772             *cost += extra_cost->alu.arith;
11773         }
11774
11775       return true;
11776
11777     case CONST_FIXED:
11778       *cost = COSTS_N_INSNS (4);
11779       /* Fixme.  */
11780       return true;
11781
11782     case CONST_DOUBLE:
11783       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11784           && (mode == SFmode || !TARGET_VFP_SINGLE))
11785         {
11786           if (vfp3_const_double_rtx (x))
11787             {
11788               if (speed_p)
11789                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11790               return true;
11791             }
11792
11793           if (speed_p)
11794             {
11795               if (mode == DFmode)
11796                 *cost += extra_cost->ldst.loadd;
11797               else
11798                 *cost += extra_cost->ldst.loadf;
11799             }
11800           else
11801             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11802
11803           return true;
11804         }
11805       *cost = COSTS_N_INSNS (4);
11806       return true;
11807
11808     case CONST_VECTOR:
11809       /* Fixme.  */
11810       if (((TARGET_NEON && TARGET_HARD_FLOAT
11811             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11812            || TARGET_HAVE_MVE)
11813           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11814         *cost = COSTS_N_INSNS (1);
11815       else
11816         *cost = COSTS_N_INSNS (4);
11817       return true;
11818
11819     case HIGH:
11820     case LO_SUM:
11821       /* When optimizing for size, we prefer constant pool entries to
11822          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11823       if (!speed_p)
11824         *cost += 1;
11825       return true;
11826
11827     case CLZ:
11828       if (speed_p)
11829         *cost += extra_cost->alu.clz;
11830       return false;
11831
11832     case SMIN:
11833       if (XEXP (x, 1) == const0_rtx)
11834         {
11835           if (speed_p)
11836             *cost += extra_cost->alu.log_shift;
11837           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11838           return true;
11839         }
11840       /* Fall through.  */
11841     case SMAX:
11842     case UMIN:
11843     case UMAX:
11844       *cost += COSTS_N_INSNS (1);
11845       return false;
11846
11847     case TRUNCATE:
11848       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11849           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11850           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11851           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11852           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11853                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11854               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11855                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11856                       == ZERO_EXTEND))))
11857         {
11858           if (speed_p)
11859             *cost += extra_cost->mult[1].extend;
11860           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11861                               ZERO_EXTEND, 0, speed_p)
11862                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11863                                 ZERO_EXTEND, 0, speed_p));
11864           return true;
11865         }
11866       *cost = LIBCALL_COST (1);
11867       return false;
11868
11869     case UNSPEC_VOLATILE:
11870     case UNSPEC:
11871       return arm_unspec_cost (x, outer_code, speed_p, cost);
11872
11873     case PC:
11874       /* Reading the PC is like reading any other register.  Writing it
11875          is more expensive, but we take that into account elsewhere.  */
11876       *cost = 0;
11877       return true;
11878
11879     case ZERO_EXTRACT:
11880       /* TODO: Simple zero_extract of bottom bits using AND.  */
11881       /* Fall through.  */
11882     case SIGN_EXTRACT:
11883       if (arm_arch6
11884           && mode == SImode
11885           && CONST_INT_P (XEXP (x, 1))
11886           && CONST_INT_P (XEXP (x, 2)))
11887         {
11888           if (speed_p)
11889             *cost += extra_cost->alu.bfx;
11890           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11891           return true;
11892         }
11893       /* Without UBFX/SBFX, need to resort to shift operations.  */
11894       *cost += COSTS_N_INSNS (1);
11895       if (speed_p)
11896         *cost += 2 * extra_cost->alu.shift;
11897       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11898       return true;
11899
11900     case FLOAT_EXTEND:
11901       if (TARGET_HARD_FLOAT)
11902         {
11903           if (speed_p)
11904             *cost += extra_cost->fp[mode == DFmode].widen;
11905           if (!TARGET_VFP5
11906               && GET_MODE (XEXP (x, 0)) == HFmode)
11907             {
11908               /* Pre v8, widening HF->DF is a two-step process, first
11909                  widening to SFmode.  */
11910               *cost += COSTS_N_INSNS (1);
11911               if (speed_p)
11912                 *cost += extra_cost->fp[0].widen;
11913             }
11914           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11915           return true;
11916         }
11917
11918       *cost = LIBCALL_COST (1);
11919       return false;
11920
11921     case FLOAT_TRUNCATE:
11922       if (TARGET_HARD_FLOAT)
11923         {
11924           if (speed_p)
11925             *cost += extra_cost->fp[mode == DFmode].narrow;
11926           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11927           return true;
11928           /* Vector modes?  */
11929         }
11930       *cost = LIBCALL_COST (1);
11931       return false;
11932
11933     case FMA:
11934       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11935         {
11936           rtx op0 = XEXP (x, 0);
11937           rtx op1 = XEXP (x, 1);
11938           rtx op2 = XEXP (x, 2);
11939
11940
11941           /* vfms or vfnma.  */
11942           if (GET_CODE (op0) == NEG)
11943             op0 = XEXP (op0, 0);
11944
11945           /* vfnms or vfnma.  */
11946           if (GET_CODE (op2) == NEG)
11947             op2 = XEXP (op2, 0);
11948
11949           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11950           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11951           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11952
11953           if (speed_p)
11954             *cost += extra_cost->fp[mode ==DFmode].fma;
11955
11956           return true;
11957         }
11958
11959       *cost = LIBCALL_COST (3);
11960       return false;
11961
11962     case FIX:
11963     case UNSIGNED_FIX:
11964       if (TARGET_HARD_FLOAT)
11965         {
11966           /* The *combine_vcvtf2i reduces a vmul+vcvt into
11967              a vcvt fixed-point conversion.  */
11968           if (code == FIX && mode == SImode
11969               && GET_CODE (XEXP (x, 0)) == FIX
11970               && GET_MODE (XEXP (x, 0)) == SFmode
11971               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11972               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11973                  > 0)
11974             {
11975               if (speed_p)
11976                 *cost += extra_cost->fp[0].toint;
11977
11978               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11979                                  code, 0, speed_p);
11980               return true;
11981             }
11982
11983           if (GET_MODE_CLASS (mode) == MODE_INT)
11984             {
11985               mode = GET_MODE (XEXP (x, 0));
11986               if (speed_p)
11987                 *cost += extra_cost->fp[mode == DFmode].toint;
11988               /* Strip of the 'cost' of rounding towards zero.  */
11989               if (GET_CODE (XEXP (x, 0)) == FIX)
11990                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11991                                    0, speed_p);
11992               else
11993                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11994               /* ??? Increase the cost to deal with transferring from
11995                  FP -> CORE registers?  */
11996               return true;
11997             }
11998           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11999                    && TARGET_VFP5)
12000             {
12001               if (speed_p)
12002                 *cost += extra_cost->fp[mode == DFmode].roundint;
12003               return false;
12004             }
12005           /* Vector costs? */
12006         }
12007       *cost = LIBCALL_COST (1);
12008       return false;
12009
12010     case FLOAT:
12011     case UNSIGNED_FLOAT:
12012       if (TARGET_HARD_FLOAT)
12013         {
12014           /* ??? Increase the cost to deal with transferring from CORE
12015              -> FP registers?  */
12016           if (speed_p)
12017             *cost += extra_cost->fp[mode == DFmode].fromint;
12018           return false;
12019         }
12020       *cost = LIBCALL_COST (1);
12021       return false;
12022
12023     case CALL:
12024       return true;
12025
12026     case ASM_OPERANDS:
12027       {
12028       /* Just a guess.  Guess number of instructions in the asm
12029          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
12030          though (see PR60663).  */
12031         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12032         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12033
12034         *cost = COSTS_N_INSNS (asm_length + num_operands);
12035         return true;
12036       }
12037     default:
12038       if (mode != VOIDmode)
12039         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12040       else
12041         *cost = COSTS_N_INSNS (4); /* Who knows?  */
12042       return false;
12043     }
12044 }
12045
12046 #undef HANDLE_NARROW_SHIFT_ARITH
12047
12048 /* RTX costs entry point.  */
12049
12050 static bool
12051 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12052                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12053 {
12054   bool result;
12055   int code = GET_CODE (x);
12056   gcc_assert (current_tune->insn_extra_cost);
12057
12058   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
12059                                 (enum rtx_code) outer_code,
12060                                 current_tune->insn_extra_cost,
12061                                 total, speed);
12062
12063   if (dump_file && arm_verbose_cost)
12064     {
12065       print_rtl_single (dump_file, x);
12066       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12067                *total, result ? "final" : "partial");
12068     }
12069   return result;
12070 }
12071
12072 static int
12073 arm_insn_cost (rtx_insn *insn, bool speed)
12074 {
12075   int cost;
12076
12077   /* Don't cost a simple reg-reg move at a full insn cost: such moves
12078      will likely disappear during register allocation.  */
12079   if (!reload_completed
12080       && GET_CODE (PATTERN (insn)) == SET
12081       && REG_P (SET_DEST (PATTERN (insn)))
12082       && REG_P (SET_SRC (PATTERN (insn))))
12083     return 2;
12084   cost = pattern_cost (PATTERN (insn), speed);
12085   /* If the cost is zero, then it's likely a complex insn.  We don't want the
12086      cost of these to be less than something we know about.  */
12087   return cost ? cost : COSTS_N_INSNS (2);
12088 }
12089
12090 /* All address computations that can be done are free, but rtx cost returns
12091    the same for practically all of them.  So we weight the different types
12092    of address here in the order (most pref first):
12093    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
12094 static inline int
12095 arm_arm_address_cost (rtx x)
12096 {
12097   enum rtx_code c  = GET_CODE (x);
12098
12099   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12100     return 0;
12101   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12102     return 10;
12103
12104   if (c == PLUS)
12105     {
12106       if (CONST_INT_P (XEXP (x, 1)))
12107         return 2;
12108
12109       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12110         return 3;
12111
12112       return 4;
12113     }
12114
12115   return 6;
12116 }
12117
12118 static inline int
12119 arm_thumb_address_cost (rtx x)
12120 {
12121   enum rtx_code c  = GET_CODE (x);
12122
12123   if (c == REG)
12124     return 1;
12125   if (c == PLUS
12126       && REG_P (XEXP (x, 0))
12127       && CONST_INT_P (XEXP (x, 1)))
12128     return 1;
12129
12130   return 2;
12131 }
12132
12133 static int
12134 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12135                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12136 {
12137   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12138 }
12139
12140 /* Adjust cost hook for XScale.  */
12141 static bool
12142 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12143                           int * cost)
12144 {
12145   /* Some true dependencies can have a higher cost depending
12146      on precisely how certain input operands are used.  */
12147   if (dep_type == 0
12148       && recog_memoized (insn) >= 0
12149       && recog_memoized (dep) >= 0)
12150     {
12151       int shift_opnum = get_attr_shift (insn);
12152       enum attr_type attr_type = get_attr_type (dep);
12153
12154       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12155          operand for INSN.  If we have a shifted input operand and the
12156          instruction we depend on is another ALU instruction, then we may
12157          have to account for an additional stall.  */
12158       if (shift_opnum != 0
12159           && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12160               || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12161               || attr_type == TYPE_ALUS_SHIFT_IMM
12162               || attr_type == TYPE_LOGIC_SHIFT_IMM
12163               || attr_type == TYPE_LOGICS_SHIFT_IMM
12164               || attr_type == TYPE_ALU_SHIFT_REG
12165               || attr_type == TYPE_ALUS_SHIFT_REG
12166               || attr_type == TYPE_LOGIC_SHIFT_REG
12167               || attr_type == TYPE_LOGICS_SHIFT_REG
12168               || attr_type == TYPE_MOV_SHIFT
12169               || attr_type == TYPE_MVN_SHIFT
12170               || attr_type == TYPE_MOV_SHIFT_REG
12171               || attr_type == TYPE_MVN_SHIFT_REG))
12172         {
12173           rtx shifted_operand;
12174           int opno;
12175
12176           /* Get the shifted operand.  */
12177           extract_insn (insn);
12178           shifted_operand = recog_data.operand[shift_opnum];
12179
12180           /* Iterate over all the operands in DEP.  If we write an operand
12181              that overlaps with SHIFTED_OPERAND, then we have increase the
12182              cost of this dependency.  */
12183           extract_insn (dep);
12184           preprocess_constraints (dep);
12185           for (opno = 0; opno < recog_data.n_operands; opno++)
12186             {
12187               /* We can ignore strict inputs.  */
12188               if (recog_data.operand_type[opno] == OP_IN)
12189                 continue;
12190
12191               if (reg_overlap_mentioned_p (recog_data.operand[opno],
12192                                            shifted_operand))
12193                 {
12194                   *cost = 2;
12195                   return false;
12196                 }
12197             }
12198         }
12199     }
12200   return true;
12201 }
12202
12203 /* Adjust cost hook for Cortex A9.  */
12204 static bool
12205 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12206                              int * cost)
12207 {
12208   switch (dep_type)
12209     {
12210     case REG_DEP_ANTI:
12211       *cost = 0;
12212       return false;
12213
12214     case REG_DEP_TRUE:
12215     case REG_DEP_OUTPUT:
12216         if (recog_memoized (insn) >= 0
12217             && recog_memoized (dep) >= 0)
12218           {
12219             if (GET_CODE (PATTERN (insn)) == SET)
12220               {
12221                 if (GET_MODE_CLASS
12222                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12223                   || GET_MODE_CLASS
12224                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12225                   {
12226                     enum attr_type attr_type_insn = get_attr_type (insn);
12227                     enum attr_type attr_type_dep = get_attr_type (dep);
12228
12229                     /* By default all dependencies of the form
12230                        s0 = s0 <op> s1
12231                        s0 = s0 <op> s2
12232                        have an extra latency of 1 cycle because
12233                        of the input and output dependency in this
12234                        case. However this gets modeled as an true
12235                        dependency and hence all these checks.  */
12236                     if (REG_P (SET_DEST (PATTERN (insn)))
12237                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12238                       {
12239                         /* FMACS is a special case where the dependent
12240                            instruction can be issued 3 cycles before
12241                            the normal latency in case of an output
12242                            dependency.  */
12243                         if ((attr_type_insn == TYPE_FMACS
12244                              || attr_type_insn == TYPE_FMACD)
12245                             && (attr_type_dep == TYPE_FMACS
12246                                 || attr_type_dep == TYPE_FMACD))
12247                           {
12248                             if (dep_type == REG_DEP_OUTPUT)
12249                               *cost = insn_default_latency (dep) - 3;
12250                             else
12251                               *cost = insn_default_latency (dep);
12252                             return false;
12253                           }
12254                         else
12255                           {
12256                             if (dep_type == REG_DEP_OUTPUT)
12257                               *cost = insn_default_latency (dep) + 1;
12258                             else
12259                               *cost = insn_default_latency (dep);
12260                           }
12261                         return false;
12262                       }
12263                   }
12264               }
12265           }
12266         break;
12267
12268     default:
12269       gcc_unreachable ();
12270     }
12271
12272   return true;
12273 }
12274
12275 /* Adjust cost hook for FA726TE.  */
12276 static bool
12277 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12278                            int * cost)
12279 {
12280   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12281      have penalty of 3.  */
12282   if (dep_type == REG_DEP_TRUE
12283       && recog_memoized (insn) >= 0
12284       && recog_memoized (dep) >= 0
12285       && get_attr_conds (dep) == CONDS_SET)
12286     {
12287       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12288       if (get_attr_conds (insn) == CONDS_USE
12289           && get_attr_type (insn) != TYPE_BRANCH)
12290         {
12291           *cost = 3;
12292           return false;
12293         }
12294
12295       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12296           || get_attr_conds (insn) == CONDS_USE)
12297         {
12298           *cost = 0;
12299           return false;
12300         }
12301     }
12302
12303   return true;
12304 }
12305
12306 /* Implement TARGET_REGISTER_MOVE_COST.
12307
12308    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12309    it is typically more expensive than a single memory access.  We set
12310    the cost to less than two memory accesses so that floating
12311    point to integer conversion does not go through memory.  */
12312
12313 int
12314 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12315                         reg_class_t from, reg_class_t to)
12316 {
12317   if (TARGET_32BIT)
12318     {
12319       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12320           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12321         return 15;
12322       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12323                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12324         return 4;
12325       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12326         return 20;
12327       else
12328         return 2;
12329     }
12330   else
12331     {
12332       if (from == HI_REGS || to == HI_REGS)
12333         return 4;
12334       else
12335         return 2;
12336     }
12337 }
12338
12339 /* Implement TARGET_MEMORY_MOVE_COST.  */
12340
12341 int
12342 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12343                       bool in ATTRIBUTE_UNUSED)
12344 {
12345   if (TARGET_32BIT)
12346     return 10;
12347   else
12348     {
12349       if (GET_MODE_SIZE (mode) < 4)
12350         return 8;
12351       else
12352         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12353     }
12354 }
12355
12356 /* Vectorizer cost model implementation.  */
12357
12358 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12359 static int
12360 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12361                                 tree vectype,
12362                                 int misalign ATTRIBUTE_UNUSED)
12363 {
12364   unsigned elements;
12365
12366   switch (type_of_cost)
12367     {
12368       case scalar_stmt:
12369         return current_tune->vec_costs->scalar_stmt_cost;
12370
12371       case scalar_load:
12372         return current_tune->vec_costs->scalar_load_cost;
12373
12374       case scalar_store:
12375         return current_tune->vec_costs->scalar_store_cost;
12376
12377       case vector_stmt:
12378         return current_tune->vec_costs->vec_stmt_cost;
12379
12380       case vector_load:
12381         return current_tune->vec_costs->vec_align_load_cost;
12382
12383       case vector_store:
12384         return current_tune->vec_costs->vec_store_cost;
12385
12386       case vec_to_scalar:
12387         return current_tune->vec_costs->vec_to_scalar_cost;
12388
12389       case scalar_to_vec:
12390         return current_tune->vec_costs->scalar_to_vec_cost;
12391
12392       case unaligned_load:
12393       case vector_gather_load:
12394         return current_tune->vec_costs->vec_unalign_load_cost;
12395
12396       case unaligned_store:
12397       case vector_scatter_store:
12398         return current_tune->vec_costs->vec_unalign_store_cost;
12399
12400       case cond_branch_taken:
12401         return current_tune->vec_costs->cond_taken_branch_cost;
12402
12403       case cond_branch_not_taken:
12404         return current_tune->vec_costs->cond_not_taken_branch_cost;
12405
12406       case vec_perm:
12407       case vec_promote_demote:
12408         return current_tune->vec_costs->vec_stmt_cost;
12409
12410       case vec_construct:
12411         elements = TYPE_VECTOR_SUBPARTS (vectype);
12412         return elements / 2 + 1;
12413
12414       default:
12415         gcc_unreachable ();
12416     }
12417 }
12418
12419 /* Return true if and only if this insn can dual-issue only as older.  */
12420 static bool
12421 cortexa7_older_only (rtx_insn *insn)
12422 {
12423   if (recog_memoized (insn) < 0)
12424     return false;
12425
12426   switch (get_attr_type (insn))
12427     {
12428     case TYPE_ALU_DSP_REG:
12429     case TYPE_ALU_SREG:
12430     case TYPE_ALUS_SREG:
12431     case TYPE_LOGIC_REG:
12432     case TYPE_LOGICS_REG:
12433     case TYPE_ADC_REG:
12434     case TYPE_ADCS_REG:
12435     case TYPE_ADR:
12436     case TYPE_BFM:
12437     case TYPE_REV:
12438     case TYPE_MVN_REG:
12439     case TYPE_SHIFT_IMM:
12440     case TYPE_SHIFT_REG:
12441     case TYPE_LOAD_BYTE:
12442     case TYPE_LOAD_4:
12443     case TYPE_STORE_4:
12444     case TYPE_FFARITHS:
12445     case TYPE_FADDS:
12446     case TYPE_FFARITHD:
12447     case TYPE_FADDD:
12448     case TYPE_FMOV:
12449     case TYPE_F_CVT:
12450     case TYPE_FCMPS:
12451     case TYPE_FCMPD:
12452     case TYPE_FCONSTS:
12453     case TYPE_FCONSTD:
12454     case TYPE_FMULS:
12455     case TYPE_FMACS:
12456     case TYPE_FMULD:
12457     case TYPE_FMACD:
12458     case TYPE_FDIVS:
12459     case TYPE_FDIVD:
12460     case TYPE_F_MRC:
12461     case TYPE_F_MRRC:
12462     case TYPE_F_FLAG:
12463     case TYPE_F_LOADS:
12464     case TYPE_F_STORES:
12465       return true;
12466     default:
12467       return false;
12468     }
12469 }
12470
12471 /* Return true if and only if this insn can dual-issue as younger.  */
12472 static bool
12473 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12474 {
12475   if (recog_memoized (insn) < 0)
12476     {
12477       if (verbose > 5)
12478         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12479       return false;
12480     }
12481
12482   switch (get_attr_type (insn))
12483     {
12484     case TYPE_ALU_IMM:
12485     case TYPE_ALUS_IMM:
12486     case TYPE_LOGIC_IMM:
12487     case TYPE_LOGICS_IMM:
12488     case TYPE_EXTEND:
12489     case TYPE_MVN_IMM:
12490     case TYPE_MOV_IMM:
12491     case TYPE_MOV_REG:
12492     case TYPE_MOV_SHIFT:
12493     case TYPE_MOV_SHIFT_REG:
12494     case TYPE_BRANCH:
12495     case TYPE_CALL:
12496       return true;
12497     default:
12498       return false;
12499     }
12500 }
12501
12502
12503 /* Look for an instruction that can dual issue only as an older
12504    instruction, and move it in front of any instructions that can
12505    dual-issue as younger, while preserving the relative order of all
12506    other instructions in the ready list.  This is a hueuristic to help
12507    dual-issue in later cycles, by postponing issue of more flexible
12508    instructions.  This heuristic may affect dual issue opportunities
12509    in the current cycle.  */
12510 static void
12511 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12512                         int *n_readyp, int clock)
12513 {
12514   int i;
12515   int first_older_only = -1, first_younger = -1;
12516
12517   if (verbose > 5)
12518     fprintf (file,
12519              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12520              clock,
12521              *n_readyp);
12522
12523   /* Traverse the ready list from the head (the instruction to issue
12524      first), and looking for the first instruction that can issue as
12525      younger and the first instruction that can dual-issue only as
12526      older.  */
12527   for (i = *n_readyp - 1; i >= 0; i--)
12528     {
12529       rtx_insn *insn = ready[i];
12530       if (cortexa7_older_only (insn))
12531         {
12532           first_older_only = i;
12533           if (verbose > 5)
12534             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12535           break;
12536         }
12537       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12538         first_younger = i;
12539     }
12540
12541   /* Nothing to reorder because either no younger insn found or insn
12542      that can dual-issue only as older appears before any insn that
12543      can dual-issue as younger.  */
12544   if (first_younger == -1)
12545     {
12546       if (verbose > 5)
12547         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12548       return;
12549     }
12550
12551   /* Nothing to reorder because no older-only insn in the ready list.  */
12552   if (first_older_only == -1)
12553     {
12554       if (verbose > 5)
12555         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12556       return;
12557     }
12558
12559   /* Move first_older_only insn before first_younger.  */
12560   if (verbose > 5)
12561     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12562              INSN_UID(ready [first_older_only]),
12563              INSN_UID(ready [first_younger]));
12564   rtx_insn *first_older_only_insn = ready [first_older_only];
12565   for (i = first_older_only; i < first_younger; i++)
12566     {
12567       ready[i] = ready[i+1];
12568     }
12569
12570   ready[i] = first_older_only_insn;
12571   return;
12572 }
12573
12574 /* Implement TARGET_SCHED_REORDER. */
12575 static int
12576 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12577                    int clock)
12578 {
12579   switch (arm_tune)
12580     {
12581     case TARGET_CPU_cortexa7:
12582       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12583       break;
12584     default:
12585       /* Do nothing for other cores.  */
12586       break;
12587     }
12588
12589   return arm_issue_rate ();
12590 }
12591
12592 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12593    It corrects the value of COST based on the relationship between
12594    INSN and DEP through the dependence LINK.  It returns the new
12595    value. There is a per-core adjust_cost hook to adjust scheduler costs
12596    and the per-core hook can choose to completely override the generic
12597    adjust_cost function. Only put bits of code into arm_adjust_cost that
12598    are common across all cores.  */
12599 static int
12600 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12601                  unsigned int)
12602 {
12603   rtx i_pat, d_pat;
12604
12605  /* When generating Thumb-1 code, we want to place flag-setting operations
12606     close to a conditional branch which depends on them, so that we can
12607     omit the comparison. */
12608   if (TARGET_THUMB1
12609       && dep_type == 0
12610       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12611       && recog_memoized (dep) >= 0
12612       && get_attr_conds (dep) == CONDS_SET)
12613     return 0;
12614
12615   if (current_tune->sched_adjust_cost != NULL)
12616     {
12617       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12618         return cost;
12619     }
12620
12621   /* XXX Is this strictly true?  */
12622   if (dep_type == REG_DEP_ANTI
12623       || dep_type == REG_DEP_OUTPUT)
12624     return 0;
12625
12626   /* Call insns don't incur a stall, even if they follow a load.  */
12627   if (dep_type == 0
12628       && CALL_P (insn))
12629     return 1;
12630
12631   if ((i_pat = single_set (insn)) != NULL
12632       && MEM_P (SET_SRC (i_pat))
12633       && (d_pat = single_set (dep)) != NULL
12634       && MEM_P (SET_DEST (d_pat)))
12635     {
12636       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12637       /* This is a load after a store, there is no conflict if the load reads
12638          from a cached area.  Assume that loads from the stack, and from the
12639          constant pool are cached, and that others will miss.  This is a
12640          hack.  */
12641
12642       if ((SYMBOL_REF_P (src_mem)
12643            && CONSTANT_POOL_ADDRESS_P (src_mem))
12644           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12645           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12646           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12647         return 1;
12648     }
12649
12650   return cost;
12651 }
12652
12653 int
12654 arm_max_conditional_execute (void)
12655 {
12656   return max_insns_skipped;
12657 }
12658
12659 static int
12660 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12661 {
12662   if (TARGET_32BIT)
12663     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12664   else
12665     return (optimize > 0) ? 2 : 0;
12666 }
12667
12668 static int
12669 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12670 {
12671   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12672 }
12673
12674 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12675    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12676    sequences of non-executed instructions in IT blocks probably take the same
12677    amount of time as executed instructions (and the IT instruction itself takes
12678    space in icache).  This function was experimentally determined to give good
12679    results on a popular embedded benchmark.  */
12680
12681 static int
12682 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12683 {
12684   return (TARGET_32BIT && speed_p) ? 1
12685          : arm_default_branch_cost (speed_p, predictable_p);
12686 }
12687
12688 static int
12689 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12690 {
12691   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12692 }
12693
12694 static bool fp_consts_inited = false;
12695
12696 static REAL_VALUE_TYPE value_fp0;
12697
12698 static void
12699 init_fp_table (void)
12700 {
12701   REAL_VALUE_TYPE r;
12702
12703   r = REAL_VALUE_ATOF ("0", DFmode);
12704   value_fp0 = r;
12705   fp_consts_inited = true;
12706 }
12707
12708 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12709 int
12710 arm_const_double_rtx (rtx x)
12711 {
12712   const REAL_VALUE_TYPE *r;
12713
12714   if (!fp_consts_inited)
12715     init_fp_table ();
12716
12717   r = CONST_DOUBLE_REAL_VALUE (x);
12718   if (REAL_VALUE_MINUS_ZERO (*r))
12719     return 0;
12720
12721   if (real_equal (r, &value_fp0))
12722     return 1;
12723
12724   return 0;
12725 }
12726
12727 /* VFPv3 has a fairly wide range of representable immediates, formed from
12728    "quarter-precision" floating-point values. These can be evaluated using this
12729    formula (with ^ for exponentiation):
12730
12731      -1^s * n * 2^-r
12732
12733    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12734    16 <= n <= 31 and 0 <= r <= 7.
12735
12736    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12737
12738      - A (most-significant) is the sign bit.
12739      - BCD are the exponent (encoded as r XOR 3).
12740      - EFGH are the mantissa (encoded as n - 16).
12741 */
12742
12743 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12744    fconst[sd] instruction, or -1 if X isn't suitable.  */
12745 static int
12746 vfp3_const_double_index (rtx x)
12747 {
12748   REAL_VALUE_TYPE r, m;
12749   int sign, exponent;
12750   unsigned HOST_WIDE_INT mantissa, mant_hi;
12751   unsigned HOST_WIDE_INT mask;
12752   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12753   bool fail;
12754
12755   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12756     return -1;
12757
12758   r = *CONST_DOUBLE_REAL_VALUE (x);
12759
12760   /* We can't represent these things, so detect them first.  */
12761   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12762     return -1;
12763
12764   /* Extract sign, exponent and mantissa.  */
12765   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12766   r = real_value_abs (&r);
12767   exponent = REAL_EXP (&r);
12768   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12769      highest (sign) bit, with a fixed binary point at bit point_pos.
12770      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12771      bits for the mantissa, this may fail (low bits would be lost).  */
12772   real_ldexp (&m, &r, point_pos - exponent);
12773   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12774   mantissa = w.elt (0);
12775   mant_hi = w.elt (1);
12776
12777   /* If there are bits set in the low part of the mantissa, we can't
12778      represent this value.  */
12779   if (mantissa != 0)
12780     return -1;
12781
12782   /* Now make it so that mantissa contains the most-significant bits, and move
12783      the point_pos to indicate that the least-significant bits have been
12784      discarded.  */
12785   point_pos -= HOST_BITS_PER_WIDE_INT;
12786   mantissa = mant_hi;
12787
12788   /* We can permit four significant bits of mantissa only, plus a high bit
12789      which is always 1.  */
12790   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12791   if ((mantissa & mask) != 0)
12792     return -1;
12793
12794   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12795   mantissa >>= point_pos - 5;
12796
12797   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12798      floating-point immediate zero with Neon using an integer-zero load, but
12799      that case is handled elsewhere.)  */
12800   if (mantissa == 0)
12801     return -1;
12802
12803   gcc_assert (mantissa >= 16 && mantissa <= 31);
12804
12805   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12806      normalized significands are in the range [1, 2). (Our mantissa is shifted
12807      left 4 places at this point relative to normalized IEEE754 values).  GCC
12808      internally uses [0.5, 1) (see real.cc), so the exponent returned from
12809      REAL_EXP must be altered.  */
12810   exponent = 5 - exponent;
12811
12812   if (exponent < 0 || exponent > 7)
12813     return -1;
12814
12815   /* Sign, mantissa and exponent are now in the correct form to plug into the
12816      formula described in the comment above.  */
12817   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12818 }
12819
12820 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12821 int
12822 vfp3_const_double_rtx (rtx x)
12823 {
12824   if (!TARGET_VFP3)
12825     return 0;
12826
12827   return vfp3_const_double_index (x) != -1;
12828 }
12829
12830 /* Recognize immediates which can be used in various Neon and MVE instructions.
12831    Legal immediates are described by the following table (for VMVN variants, the
12832    bitwise inverse of the constant shown is recognized. In either case, VMOV
12833    is output and the correct instruction to use for a given constant is chosen
12834    by the assembler). The constant shown is replicated across all elements of
12835    the destination vector.
12836
12837    insn elems variant constant (binary)
12838    ---- ----- ------- -----------------
12839    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12840    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12841    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12842    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12843    vmov  i16     4    00000000 abcdefgh
12844    vmov  i16     5    abcdefgh 00000000
12845    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12846    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12847    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12848    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12849    vmvn  i16    10    00000000 abcdefgh
12850    vmvn  i16    11    abcdefgh 00000000
12851    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12852    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12853    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12854    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12855    vmov   i8    16    abcdefgh
12856    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12857                       eeeeeeee ffffffff gggggggg hhhhhhhh
12858    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12859    vmov  f32    19    00000000 00000000 00000000 00000000
12860
12861    For case 18, B = !b. Representable values are exactly those accepted by
12862    vfp3_const_double_index, but are output as floating-point numbers rather
12863    than indices.
12864
12865    For case 19, we will change it to vmov.i32 when assembling.
12866
12867    Variants 0-5 (inclusive) may also be used as immediates for the second
12868    operand of VORR/VBIC instructions.
12869
12870    The INVERSE argument causes the bitwise inverse of the given operand to be
12871    recognized instead (used for recognizing legal immediates for the VAND/VORN
12872    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12873    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12874    output, rather than the real insns vbic/vorr).
12875
12876    INVERSE makes no difference to the recognition of float vectors.
12877
12878    The return value is the variant of immediate as shown in the above table, or
12879    -1 if the given value doesn't match any of the listed patterns.
12880 */
12881 static int
12882 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12883                       rtx *modconst, int *elementwidth)
12884 {
12885 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12886   matches = 1;                                  \
12887   for (i = 0; i < idx; i += (STRIDE))           \
12888     if (!(TEST))                                \
12889       matches = 0;                              \
12890   if (matches)                                  \
12891     {                                           \
12892       immtype = (CLASS);                        \
12893       elsize = (ELSIZE);                        \
12894       break;                                    \
12895     }
12896
12897   unsigned int i, elsize = 0, idx = 0, n_elts;
12898   unsigned int innersize;
12899   unsigned char bytes[16] = {};
12900   int immtype = -1, matches;
12901   unsigned int invmask = inverse ? 0xff : 0;
12902   bool vector = GET_CODE (op) == CONST_VECTOR;
12903
12904   if (vector)
12905     n_elts = CONST_VECTOR_NUNITS (op);
12906   else
12907     {
12908       n_elts = 1;
12909       gcc_assert (mode != VOIDmode);
12910     }
12911
12912   innersize = GET_MODE_UNIT_SIZE (mode);
12913
12914   /* Only support 128-bit vectors for MVE.  */
12915   if (TARGET_HAVE_MVE
12916       && (!vector
12917           || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12918           || n_elts * innersize != 16))
12919     return -1;
12920
12921   if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
12922     return -1;
12923
12924   /* Vectors of float constants.  */
12925   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12926     {
12927       rtx el0 = CONST_VECTOR_ELT (op, 0);
12928
12929       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12930         return -1;
12931
12932       /* FP16 vectors cannot be represented.  */
12933       if (GET_MODE_INNER (mode) == HFmode)
12934         return -1;
12935
12936       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12937          are distinct in this context.  */
12938       if (!const_vec_duplicate_p (op))
12939         return -1;
12940
12941       if (modconst)
12942         *modconst = CONST_VECTOR_ELT (op, 0);
12943
12944       if (elementwidth)
12945         *elementwidth = 0;
12946
12947       if (el0 == CONST0_RTX (GET_MODE (el0)))
12948         return 19;
12949       else
12950         return 18;
12951     }
12952
12953   /* The tricks done in the code below apply for little-endian vector layout.
12954      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12955      FIXME: Implement logic for big-endian vectors.  */
12956   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12957     return -1;
12958
12959   /* Splat vector constant out into a byte vector.  */
12960   for (i = 0; i < n_elts; i++)
12961     {
12962       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12963       unsigned HOST_WIDE_INT elpart;
12964
12965       gcc_assert (CONST_INT_P (el));
12966       elpart = INTVAL (el);
12967
12968       for (unsigned int byte = 0; byte < innersize; byte++)
12969         {
12970           bytes[idx++] = (elpart & 0xff) ^ invmask;
12971           elpart >>= BITS_PER_UNIT;
12972         }
12973     }
12974
12975   /* Sanity check.  */
12976   gcc_assert (idx == GET_MODE_SIZE (mode));
12977
12978   do
12979     {
12980       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12981                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12982
12983       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12984                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12985
12986       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12987                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12988
12989       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12990                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12991
12992       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12993
12994       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12995
12996       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12997                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12998
12999       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13000                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13001
13002       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13003                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13004
13005       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13006                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13007
13008       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13009
13010       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13011
13012       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13013                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13014
13015       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13016                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13017
13018       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13019                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13020
13021       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13022                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13023
13024       CHECK (1, 8, 16, bytes[i] == bytes[0]);
13025
13026       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13027                         && bytes[i] == bytes[(i + 8) % idx]);
13028     }
13029   while (0);
13030
13031   if (immtype == -1)
13032     return -1;
13033
13034   if (elementwidth)
13035     *elementwidth = elsize;
13036
13037   if (modconst)
13038     {
13039       unsigned HOST_WIDE_INT imm = 0;
13040
13041       /* Un-invert bytes of recognized vector, if necessary.  */
13042       if (invmask != 0)
13043         for (i = 0; i < idx; i++)
13044           bytes[i] ^= invmask;
13045
13046       if (immtype == 17)
13047         {
13048           /* FIXME: Broken on 32-bit H_W_I hosts.  */
13049           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13050
13051           for (i = 0; i < 8; i++)
13052             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13053                    << (i * BITS_PER_UNIT);
13054
13055           *modconst = GEN_INT (imm);
13056         }
13057       else
13058         {
13059           unsigned HOST_WIDE_INT imm = 0;
13060
13061           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13062             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13063
13064           *modconst = GEN_INT (imm);
13065         }
13066     }
13067
13068   return immtype;
13069 #undef CHECK
13070 }
13071
13072 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13073    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
13074    (or zero for float elements), and a modified constant (whatever should be
13075    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
13076    modified to "simd_immediate_valid_for_move" as this function will be used
13077    both by neon and mve.  */
13078 int
13079 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13080                                rtx *modconst, int *elementwidth)
13081 {
13082   rtx tmpconst;
13083   int tmpwidth;
13084   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13085
13086   if (retval == -1)
13087     return 0;
13088
13089   if (modconst)
13090     *modconst = tmpconst;
13091
13092   if (elementwidth)
13093     *elementwidth = tmpwidth;
13094
13095   return 1;
13096 }
13097
13098 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
13099    the immediate is valid, write a constant suitable for using as an operand
13100    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13101    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
13102
13103 int
13104 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13105                                 rtx *modconst, int *elementwidth)
13106 {
13107   rtx tmpconst;
13108   int tmpwidth;
13109   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13110
13111   if (retval < 0 || retval > 5)
13112     return 0;
13113
13114   if (modconst)
13115     *modconst = tmpconst;
13116
13117   if (elementwidth)
13118     *elementwidth = tmpwidth;
13119
13120   return 1;
13121 }
13122
13123 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
13124    the immediate is valid, write a constant suitable for using as an operand
13125    to VSHR/VSHL to *MODCONST and the corresponding element width to
13126    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13127    because they have different limitations.  */
13128
13129 int
13130 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13131                                 rtx *modconst, int *elementwidth,
13132                                 bool isleftshift)
13133 {
13134   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13135   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13136   unsigned HOST_WIDE_INT last_elt = 0;
13137   unsigned HOST_WIDE_INT maxshift;
13138
13139   /* Split vector constant out into a byte vector.  */
13140   for (i = 0; i < n_elts; i++)
13141     {
13142       rtx el = CONST_VECTOR_ELT (op, i);
13143       unsigned HOST_WIDE_INT elpart;
13144
13145       if (CONST_INT_P (el))
13146         elpart = INTVAL (el);
13147       else if (CONST_DOUBLE_P (el))
13148         return 0;
13149       else
13150         gcc_unreachable ();
13151
13152       if (i != 0 && elpart != last_elt)
13153         return 0;
13154
13155       last_elt = elpart;
13156     }
13157
13158   /* Shift less than element size.  */
13159   maxshift = innersize * 8;
13160
13161   if (isleftshift)
13162     {
13163       /* Left shift immediate value can be from 0 to <size>-1.  */
13164       if (last_elt >= maxshift)
13165         return 0;
13166     }
13167   else
13168     {
13169       /* Right shift immediate value can be from 1 to <size>.  */
13170       if (last_elt == 0 || last_elt > maxshift)
13171         return 0;
13172     }
13173
13174   if (elementwidth)
13175     *elementwidth = innersize * 8;
13176
13177   if (modconst)
13178     *modconst = CONST_VECTOR_ELT (op, 0);
13179
13180   return 1;
13181 }
13182
13183 /* Return a string suitable for output of Neon immediate logic operation
13184    MNEM.  */
13185
13186 char *
13187 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13188                              int inverse, int quad)
13189 {
13190   int width, is_valid;
13191   static char templ[40];
13192
13193   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13194
13195   gcc_assert (is_valid != 0);
13196
13197   if (quad)
13198     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13199   else
13200     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13201
13202   return templ;
13203 }
13204
13205 /* Return a string suitable for output of Neon immediate shift operation
13206    (VSHR or VSHL) MNEM.  */
13207
13208 char *
13209 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13210                              machine_mode mode, int quad,
13211                              bool isleftshift)
13212 {
13213   int width, is_valid;
13214   static char templ[40];
13215
13216   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13217   gcc_assert (is_valid != 0);
13218
13219   if (quad)
13220     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13221   else
13222     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13223
13224   return templ;
13225 }
13226
13227 /* Output a sequence of pairwise operations to implement a reduction.
13228    NOTE: We do "too much work" here, because pairwise operations work on two
13229    registers-worth of operands in one go. Unfortunately we can't exploit those
13230    extra calculations to do the full operation in fewer steps, I don't think.
13231    Although all vector elements of the result but the first are ignored, we
13232    actually calculate the same result in each of the elements. An alternative
13233    such as initially loading a vector with zero to use as each of the second
13234    operands would use up an additional register and take an extra instruction,
13235    for no particular gain.  */
13236
13237 void
13238 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13239                       rtx (*reduc) (rtx, rtx, rtx))
13240 {
13241   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13242   rtx tmpsum = op1;
13243
13244   for (i = parts / 2; i >= 1; i /= 2)
13245     {
13246       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13247       emit_insn (reduc (dest, tmpsum, tmpsum));
13248       tmpsum = dest;
13249     }
13250 }
13251
13252 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13253    loaded into a register using VDUP.
13254
13255    If this is the case, and GENERATE is set, we also generate
13256    instructions to do this and return an RTX to assign to the register.  */
13257
13258 static rtx
13259 neon_vdup_constant (rtx vals, bool generate)
13260 {
13261   machine_mode mode = GET_MODE (vals);
13262   machine_mode inner_mode = GET_MODE_INNER (mode);
13263   rtx x;
13264
13265   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13266     return NULL_RTX;
13267
13268   if (!const_vec_duplicate_p (vals, &x))
13269     /* The elements are not all the same.  We could handle repeating
13270        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13271        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13272        vdup.i16).  */
13273     return NULL_RTX;
13274
13275   if (!generate)
13276     return x;
13277
13278   /* We can load this constant by using VDUP and a constant in a
13279      single ARM register.  This will be cheaper than a vector
13280      load.  */
13281
13282   x = copy_to_mode_reg (inner_mode, x);
13283   return gen_vec_duplicate (mode, x);
13284 }
13285
13286 /* Return a HI representation of CONST_VEC suitable for MVE predicates.  */
13287 rtx
13288 mve_bool_vec_to_const (rtx const_vec)
13289 {
13290   int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec));
13291   int repeat = 16 / n_elts;
13292   int i;
13293   int hi_val = 0;
13294
13295   for (i = 0; i < n_elts; i++)
13296     {
13297       rtx el = CONST_VECTOR_ELT (const_vec, i);
13298       unsigned HOST_WIDE_INT elpart;
13299
13300       gcc_assert (CONST_INT_P (el));
13301       elpart = INTVAL (el);
13302
13303       for (int j = 0; j < repeat; j++)
13304         hi_val |= elpart << (i * repeat + j);
13305     }
13306   return gen_int_mode (hi_val, HImode);
13307 }
13308
13309 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13310    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13311    into a register.
13312
13313    If this is the case, and GENERATE is set, we also generate code to do
13314    this and return an RTX to copy into the register.  */
13315
13316 rtx
13317 neon_make_constant (rtx vals, bool generate)
13318 {
13319   machine_mode mode = GET_MODE (vals);
13320   rtx target;
13321   rtx const_vec = NULL_RTX;
13322   int n_elts = GET_MODE_NUNITS (mode);
13323   int n_const = 0;
13324   int i;
13325
13326   if (GET_CODE (vals) == CONST_VECTOR)
13327     const_vec = vals;
13328   else if (GET_CODE (vals) == PARALLEL)
13329     {
13330       /* A CONST_VECTOR must contain only CONST_INTs and
13331          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13332          Only store valid constants in a CONST_VECTOR.  */
13333       for (i = 0; i < n_elts; ++i)
13334         {
13335           rtx x = XVECEXP (vals, 0, i);
13336           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13337             n_const++;
13338         }
13339       if (n_const == n_elts)
13340         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13341     }
13342   else
13343     gcc_unreachable ();
13344
13345   if (const_vec != NULL
13346       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13347     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13348     return const_vec;
13349   else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL))
13350     return mve_bool_vec_to_const (const_vec);
13351   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13352     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13353        pipeline cycle; creating the constant takes one or two ARM
13354        pipeline cycles.  */
13355     return target;
13356   else if (const_vec != NULL_RTX)
13357     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13358        (for either double or quad vectors).  We cannot take advantage
13359        of single-cycle VLD1 because we need a PC-relative addressing
13360        mode.  */
13361     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13362   else
13363     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13364        We cannot construct an initializer.  */
13365     return NULL_RTX;
13366 }
13367
13368 /* Initialize vector TARGET to VALS.  */
13369
13370 void
13371 neon_expand_vector_init (rtx target, rtx vals)
13372 {
13373   machine_mode mode = GET_MODE (target);
13374   machine_mode inner_mode = GET_MODE_INNER (mode);
13375   int n_elts = GET_MODE_NUNITS (mode);
13376   int n_var = 0, one_var = -1;
13377   bool all_same = true;
13378   rtx x, mem;
13379   int i;
13380
13381   for (i = 0; i < n_elts; ++i)
13382     {
13383       x = XVECEXP (vals, 0, i);
13384       if (!CONSTANT_P (x))
13385         ++n_var, one_var = i;
13386
13387       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13388         all_same = false;
13389     }
13390
13391   if (n_var == 0)
13392     {
13393       rtx constant = neon_make_constant (vals);
13394       if (constant != NULL_RTX)
13395         {
13396           emit_move_insn (target, constant);
13397           return;
13398         }
13399     }
13400
13401   /* Splat a single non-constant element if we can.  */
13402   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13403     {
13404       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13405       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13406       return;
13407     }
13408
13409   /* One field is non-constant.  Load constant then overwrite varying
13410      field.  This is more efficient than using the stack.  */
13411   if (n_var == 1)
13412     {
13413       rtx copy = copy_rtx (vals);
13414       rtx merge_mask = GEN_INT (1 << one_var);
13415
13416       /* Load constant part of vector, substitute neighboring value for
13417          varying element.  */
13418       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13419       neon_expand_vector_init (target, copy);
13420
13421       /* Insert variable.  */
13422       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13423       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13424       return;
13425     }
13426
13427   /* Construct the vector in memory one field at a time
13428      and load the whole vector.  */
13429   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13430   for (i = 0; i < n_elts; i++)
13431     emit_move_insn (adjust_address_nv (mem, inner_mode,
13432                                     i * GET_MODE_SIZE (inner_mode)),
13433                     XVECEXP (vals, 0, i));
13434   emit_move_insn (target, mem);
13435 }
13436
13437 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13438    ERR if it doesn't.  EXP indicates the source location, which includes the
13439    inlining history for intrinsics.  */
13440
13441 static void
13442 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13443               const_tree exp, const char *desc)
13444 {
13445   HOST_WIDE_INT lane;
13446
13447   gcc_assert (CONST_INT_P (operand));
13448
13449   lane = INTVAL (operand);
13450
13451   if (lane < low || lane >= high)
13452     {
13453       if (exp)
13454         error_at (EXPR_LOCATION (exp),
13455                   "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13456       else
13457         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13458     }
13459 }
13460
13461 /* Bounds-check lanes.  */
13462
13463 void
13464 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13465                   const_tree exp)
13466 {
13467   bounds_check (operand, low, high, exp, "lane");
13468 }
13469
13470 /* Bounds-check constants.  */
13471
13472 void
13473 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13474 {
13475   bounds_check (operand, low, high, NULL_TREE, "constant");
13476 }
13477
13478 HOST_WIDE_INT
13479 neon_element_bits (machine_mode mode)
13480 {
13481   return GET_MODE_UNIT_BITSIZE (mode);
13482 }
13483
13484 \f
13485 /* Predicates for `match_operand' and `match_operator'.  */
13486
13487 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13488    WB level is 2 if full writeback address modes are allowed, 1
13489    if limited writeback address modes (POST_INC and PRE_DEC) are
13490    allowed and 0 if no writeback at all is supported.  */
13491
13492 int
13493 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13494 {
13495   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13496   rtx ind;
13497
13498   /* Reject eliminable registers.  */
13499   if (! (reload_in_progress || reload_completed || lra_in_progress)
13500       && (   reg_mentioned_p (frame_pointer_rtx, op)
13501           || reg_mentioned_p (arg_pointer_rtx, op)
13502           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13503           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13504           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13505           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13506     return FALSE;
13507
13508   /* Constants are converted into offsets from labels.  */
13509   if (!MEM_P (op))
13510     return FALSE;
13511
13512   ind = XEXP (op, 0);
13513
13514   if (reload_completed
13515       && (LABEL_REF_P (ind)
13516           || (GET_CODE (ind) == CONST
13517               && GET_CODE (XEXP (ind, 0)) == PLUS
13518               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13519               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13520     return TRUE;
13521
13522   /* Match: (mem (reg)).  */
13523   if (REG_P (ind))
13524     return arm_address_register_rtx_p (ind, 0);
13525
13526   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13527      acceptable in any case (subject to verification by
13528      arm_address_register_rtx_p).  We need full writeback to accept
13529      PRE_INC and POST_DEC, and at least restricted writeback for
13530      PRE_INC and POST_DEC.  */
13531   if (wb_level > 0
13532       && (GET_CODE (ind) == POST_INC
13533           || GET_CODE (ind) == PRE_DEC
13534           || (wb_level > 1
13535               && (GET_CODE (ind) == PRE_INC
13536                   || GET_CODE (ind) == POST_DEC))))
13537     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13538
13539   if (wb_level > 1
13540       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13541       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13542       && GET_CODE (XEXP (ind, 1)) == PLUS
13543       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13544     ind = XEXP (ind, 1);
13545
13546   /* Match:
13547      (plus (reg)
13548            (const))
13549
13550      The encoded immediate for 16-bit modes is multiplied by 2,
13551      while the encoded immediate for 32-bit and 64-bit modes is
13552      multiplied by 4.  */
13553   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13554   if (GET_CODE (ind) == PLUS
13555       && REG_P (XEXP (ind, 0))
13556       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13557       && CONST_INT_P (XEXP (ind, 1))
13558       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13559       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13560     return TRUE;
13561
13562   return FALSE;
13563 }
13564
13565 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13566    WB is true if full writeback address modes are allowed and is false
13567    if limited writeback address modes (POST_INC and PRE_DEC) are
13568    allowed.  */
13569
13570 int arm_coproc_mem_operand (rtx op, bool wb)
13571 {
13572   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13573 }
13574
13575 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13576    context in which no writeback address modes are allowed.  */
13577
13578 int
13579 arm_coproc_mem_operand_no_writeback (rtx op)
13580 {
13581   return arm_coproc_mem_operand_wb (op, 0);
13582 }
13583
13584 /* This function returns TRUE on matching mode and op.
13585 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13586 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13587 int
13588 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13589 {
13590   enum rtx_code code;
13591   int val, reg_no;
13592
13593   /* Match: (mem (reg)).  */
13594   if (REG_P (op))
13595     {
13596       int reg_no = REGNO (op);
13597       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13598                ? reg_no <= LAST_LO_REGNUM
13599                : reg_no < LAST_ARM_REGNUM)
13600               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13601     }
13602   code = GET_CODE (op);
13603
13604   if (code == POST_INC || code == PRE_DEC
13605       || code == PRE_INC || code == POST_DEC)
13606     {
13607       reg_no = REGNO (XEXP (op, 0));
13608       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13609                ? reg_no <= LAST_LO_REGNUM
13610                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13611               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13612     }
13613   else if (((code == POST_MODIFY || code == PRE_MODIFY)
13614             && GET_CODE (XEXP (op, 1)) == PLUS
13615             && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13616             && REG_P (XEXP (op, 0))
13617             && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13618            /* Make sure to only accept PLUS after reload_completed, otherwise
13619               this will interfere with auto_inc's pattern detection.  */
13620            || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13621                && GET_CODE (XEXP (op, 1)) == CONST_INT))
13622     {
13623       reg_no = REGNO (XEXP (op, 0));
13624       if (code == PLUS)
13625         val = INTVAL (XEXP (op, 1));
13626       else
13627         val = INTVAL (XEXP(XEXP (op, 1), 1));
13628
13629       switch (mode)
13630         {
13631           case E_V16QImode:
13632           case E_V8QImode:
13633           case E_V4QImode:
13634             if (abs (val) > 127)
13635               return FALSE;
13636             break;
13637           case E_V8HImode:
13638           case E_V8HFmode:
13639           case E_V4HImode:
13640           case E_V4HFmode:
13641             if (val % 2 != 0 || abs (val) > 254)
13642               return FALSE;
13643             break;
13644           case E_V4SImode:
13645           case E_V4SFmode:
13646             if (val % 4 != 0 || abs (val) > 508)
13647               return FALSE;
13648             break;
13649           default:
13650             return FALSE;
13651         }
13652       return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13653               || (MVE_STN_LDW_MODE (mode)
13654                   ? reg_no <= LAST_LO_REGNUM
13655                   : (reg_no < LAST_ARM_REGNUM
13656                      && (code == PLUS || reg_no != SP_REGNUM))));
13657     }
13658   return FALSE;
13659 }
13660
13661 /* Return TRUE if OP is a memory operand which we can load or store a vector
13662    to/from. TYPE is one of the following values:
13663     0 - Vector load/stor (vldr)
13664     1 - Core registers (ldm)
13665     2 - Element/structure loads (vld1)
13666  */
13667 int
13668 neon_vector_mem_operand (rtx op, int type, bool strict)
13669 {
13670   rtx ind;
13671
13672   /* Reject eliminable registers.  */
13673   if (strict && ! (reload_in_progress || reload_completed)
13674       && (reg_mentioned_p (frame_pointer_rtx, op)
13675           || reg_mentioned_p (arg_pointer_rtx, op)
13676           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13677           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13678           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13679           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13680     return FALSE;
13681
13682   /* Constants are converted into offsets from labels.  */
13683   if (!MEM_P (op))
13684     return FALSE;
13685
13686   ind = XEXP (op, 0);
13687
13688   if (reload_completed
13689       && (LABEL_REF_P (ind)
13690           || (GET_CODE (ind) == CONST
13691               && GET_CODE (XEXP (ind, 0)) == PLUS
13692               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13693               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13694     return TRUE;
13695
13696   /* Match: (mem (reg)).  */
13697   if (REG_P (ind))
13698     return arm_address_register_rtx_p (ind, 0);
13699
13700   /* Allow post-increment with Neon registers.  */
13701   if ((type != 1 && GET_CODE (ind) == POST_INC)
13702       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13703     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13704
13705   /* Allow post-increment by register for VLDn */
13706   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13707       && GET_CODE (XEXP (ind, 1)) == PLUS
13708       && REG_P (XEXP (XEXP (ind, 1), 1))
13709       && REG_P (XEXP (ind, 0))
13710       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13711      return true;
13712
13713   /* Match:
13714      (plus (reg)
13715           (const)).  */
13716   if (type == 0
13717       && GET_CODE (ind) == PLUS
13718       && REG_P (XEXP (ind, 0))
13719       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13720       && CONST_INT_P (XEXP (ind, 1))
13721       && INTVAL (XEXP (ind, 1)) > -1024
13722       /* For quad modes, we restrict the constant offset to be slightly less
13723          than what the instruction format permits.  We have no such constraint
13724          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13725       && (INTVAL (XEXP (ind, 1))
13726           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13727       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13728     return TRUE;
13729
13730   return FALSE;
13731 }
13732
13733 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13734    type.  */
13735 int
13736 neon_struct_mem_operand (rtx op)
13737 {
13738   rtx ind;
13739
13740   /* Reject eliminable registers.  */
13741   if (! (reload_in_progress || reload_completed)
13742       && (   reg_mentioned_p (frame_pointer_rtx, op)
13743           || reg_mentioned_p (arg_pointer_rtx, op)
13744           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13745           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13746           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13747           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13748     return FALSE;
13749
13750   /* Constants are converted into offsets from labels.  */
13751   if (!MEM_P (op))
13752     return FALSE;
13753
13754   ind = XEXP (op, 0);
13755
13756   if (reload_completed
13757       && (LABEL_REF_P (ind)
13758           || (GET_CODE (ind) == CONST
13759               && GET_CODE (XEXP (ind, 0)) == PLUS
13760               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13761               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13762     return TRUE;
13763
13764   /* Match: (mem (reg)).  */
13765   if (REG_P (ind))
13766     return arm_address_register_rtx_p (ind, 0);
13767
13768   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13769   if (GET_CODE (ind) == POST_INC
13770       || GET_CODE (ind) == PRE_DEC)
13771     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13772
13773   return FALSE;
13774 }
13775
13776 /* Prepares the operands for the VCMLA by lane instruction such that the right
13777    register number is selected.  This instruction is special in that it always
13778    requires a D register, however there is a choice to be made between Dn[0],
13779    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13780
13781    The VCMLA by lane function always selects two values. For instance given D0
13782    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13783    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13784    D0[0] or D1[0] are both valid.
13785
13786    This function centralizes that information based on OPERANDS, OPERANDS[3]
13787    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13788    updated to contain the right index.  */
13789
13790 rtx *
13791 neon_vcmla_lane_prepare_operands (rtx *operands)
13792 {
13793   int lane = INTVAL (operands[4]);
13794   machine_mode constmode = SImode;
13795   machine_mode mode = GET_MODE (operands[3]);
13796   int regno = REGNO (operands[3]);
13797   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13798   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13799     {
13800       operands[3] = gen_int_mode (regno + 1, constmode);
13801       operands[4]
13802         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13803     }
13804   else
13805     {
13806       operands[3] = gen_int_mode (regno, constmode);
13807       operands[4] = gen_int_mode (lane, constmode);
13808     }
13809   return operands;
13810 }
13811
13812
13813 /* Return true if X is a register that will be eliminated later on.  */
13814 int
13815 arm_eliminable_register (rtx x)
13816 {
13817   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13818                        || REGNO (x) == ARG_POINTER_REGNUM
13819                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13820                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13821 }
13822
13823 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13824    coprocessor registers.  Otherwise return NO_REGS.  */
13825
13826 enum reg_class
13827 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13828 {
13829   if (mode == HFmode)
13830     {
13831       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13832         return GENERAL_REGS;
13833       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13834         return NO_REGS;
13835       return GENERAL_REGS;
13836     }
13837
13838   /* The neon move patterns handle all legitimate vector and struct
13839      addresses.  */
13840   if (TARGET_NEON
13841       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13842       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13843           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13844           || VALID_NEON_STRUCT_MODE (mode)))
13845     return NO_REGS;
13846
13847   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13848     return NO_REGS;
13849
13850   return GENERAL_REGS;
13851 }
13852
13853 /* Values which must be returned in the most-significant end of the return
13854    register.  */
13855
13856 static bool
13857 arm_return_in_msb (const_tree valtype)
13858 {
13859   return (TARGET_AAPCS_BASED
13860           && BYTES_BIG_ENDIAN
13861           && (AGGREGATE_TYPE_P (valtype)
13862               || TREE_CODE (valtype) == COMPLEX_TYPE
13863               || FIXED_POINT_TYPE_P (valtype)));
13864 }
13865
13866 /* Return TRUE if X references a SYMBOL_REF.  */
13867 int
13868 symbol_mentioned_p (rtx x)
13869 {
13870   const char * fmt;
13871   int i;
13872
13873   if (SYMBOL_REF_P (x))
13874     return 1;
13875
13876   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13877      are constant offsets, not symbols.  */
13878   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13879     return 0;
13880
13881   fmt = GET_RTX_FORMAT (GET_CODE (x));
13882
13883   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13884     {
13885       if (fmt[i] == 'E')
13886         {
13887           int j;
13888
13889           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13890             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13891               return 1;
13892         }
13893       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13894         return 1;
13895     }
13896
13897   return 0;
13898 }
13899
13900 /* Return TRUE if X references a LABEL_REF.  */
13901 int
13902 label_mentioned_p (rtx x)
13903 {
13904   const char * fmt;
13905   int i;
13906
13907   if (LABEL_REF_P (x))
13908     return 1;
13909
13910   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13911      instruction, but they are constant offsets, not symbols.  */
13912   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13913     return 0;
13914
13915   fmt = GET_RTX_FORMAT (GET_CODE (x));
13916   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13917     {
13918       if (fmt[i] == 'E')
13919         {
13920           int j;
13921
13922           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13923             if (label_mentioned_p (XVECEXP (x, i, j)))
13924               return 1;
13925         }
13926       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13927         return 1;
13928     }
13929
13930   return 0;
13931 }
13932
13933 int
13934 tls_mentioned_p (rtx x)
13935 {
13936   switch (GET_CODE (x))
13937     {
13938     case CONST:
13939       return tls_mentioned_p (XEXP (x, 0));
13940
13941     case UNSPEC:
13942       if (XINT (x, 1) == UNSPEC_TLS)
13943         return 1;
13944
13945     /* Fall through.  */
13946     default:
13947       return 0;
13948     }
13949 }
13950
13951 /* Must not copy any rtx that uses a pc-relative address.
13952    Also, disallow copying of load-exclusive instructions that
13953    may appear after splitting of compare-and-swap-style operations
13954    so as to prevent those loops from being transformed away from their
13955    canonical forms (see PR 69904).  */
13956
13957 static bool
13958 arm_cannot_copy_insn_p (rtx_insn *insn)
13959 {
13960   /* The tls call insn cannot be copied, as it is paired with a data
13961      word.  */
13962   if (recog_memoized (insn) == CODE_FOR_tlscall)
13963     return true;
13964
13965   subrtx_iterator::array_type array;
13966   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13967     {
13968       const_rtx x = *iter;
13969       if (GET_CODE (x) == UNSPEC
13970           && (XINT (x, 1) == UNSPEC_PIC_BASE
13971               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13972         return true;
13973     }
13974
13975   rtx set = single_set (insn);
13976   if (set)
13977     {
13978       rtx src = SET_SRC (set);
13979       if (GET_CODE (src) == ZERO_EXTEND)
13980         src = XEXP (src, 0);
13981
13982       /* Catch the load-exclusive and load-acquire operations.  */
13983       if (GET_CODE (src) == UNSPEC_VOLATILE
13984           && (XINT (src, 1) == VUNSPEC_LL
13985               || XINT (src, 1) == VUNSPEC_LAX))
13986         return true;
13987     }
13988   return false;
13989 }
13990
13991 enum rtx_code
13992 minmax_code (rtx x)
13993 {
13994   enum rtx_code code = GET_CODE (x);
13995
13996   switch (code)
13997     {
13998     case SMAX:
13999       return GE;
14000     case SMIN:
14001       return LE;
14002     case UMIN:
14003       return LEU;
14004     case UMAX:
14005       return GEU;
14006     default:
14007       gcc_unreachable ();
14008     }
14009 }
14010
14011 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
14012
14013 bool
14014 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14015                         int *mask, bool *signed_sat)
14016 {
14017   /* The high bound must be a power of two minus one.  */
14018   int log = exact_log2 (INTVAL (hi_bound) + 1);
14019   if (log == -1)
14020     return false;
14021
14022   /* The low bound is either zero (for usat) or one less than the
14023      negation of the high bound (for ssat).  */
14024   if (INTVAL (lo_bound) == 0)
14025     {
14026       if (mask)
14027         *mask = log;
14028       if (signed_sat)
14029         *signed_sat = false;
14030
14031       return true;
14032     }
14033
14034   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14035     {
14036       if (mask)
14037         *mask = log + 1;
14038       if (signed_sat)
14039         *signed_sat = true;
14040
14041       return true;
14042     }
14043
14044   return false;
14045 }
14046
14047 /* Return 1 if memory locations are adjacent.  */
14048 int
14049 adjacent_mem_locations (rtx a, rtx b)
14050 {
14051   /* We don't guarantee to preserve the order of these memory refs.  */
14052   if (volatile_refs_p (a) || volatile_refs_p (b))
14053     return 0;
14054
14055   if ((REG_P (XEXP (a, 0))
14056        || (GET_CODE (XEXP (a, 0)) == PLUS
14057            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14058       && (REG_P (XEXP (b, 0))
14059           || (GET_CODE (XEXP (b, 0)) == PLUS
14060               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14061     {
14062       HOST_WIDE_INT val0 = 0, val1 = 0;
14063       rtx reg0, reg1;
14064       int val_diff;
14065
14066       if (GET_CODE (XEXP (a, 0)) == PLUS)
14067         {
14068           reg0 = XEXP (XEXP (a, 0), 0);
14069           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14070         }
14071       else
14072         reg0 = XEXP (a, 0);
14073
14074       if (GET_CODE (XEXP (b, 0)) == PLUS)
14075         {
14076           reg1 = XEXP (XEXP (b, 0), 0);
14077           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14078         }
14079       else
14080         reg1 = XEXP (b, 0);
14081
14082       /* Don't accept any offset that will require multiple
14083          instructions to handle, since this would cause the
14084          arith_adjacentmem pattern to output an overlong sequence.  */
14085       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14086         return 0;
14087
14088       /* Don't allow an eliminable register: register elimination can make
14089          the offset too large.  */
14090       if (arm_eliminable_register (reg0))
14091         return 0;
14092
14093       val_diff = val1 - val0;
14094
14095       if (arm_ld_sched)
14096         {
14097           /* If the target has load delay slots, then there's no benefit
14098              to using an ldm instruction unless the offset is zero and
14099              we are optimizing for size.  */
14100           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14101                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14102                   && (val_diff == 4 || val_diff == -4));
14103         }
14104
14105       return ((REGNO (reg0) == REGNO (reg1))
14106               && (val_diff == 4 || val_diff == -4));
14107     }
14108
14109   return 0;
14110 }
14111
14112 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
14113    for load operations, false for store operations.  CONSECUTIVE is true
14114    if the register numbers in the operation must be consecutive in the register
14115    bank. RETURN_PC is true if value is to be loaded in PC.
14116    The pattern we are trying to match for load is:
14117      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14118       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14119        :
14120        :
14121       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14122      ]
14123      where
14124      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14125      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14126      3.  If consecutive is TRUE, then for kth register being loaded,
14127          REGNO (R_dk) = REGNO (R_d0) + k.
14128    The pattern for store is similar.  */
14129 bool
14130 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14131                      bool consecutive, bool return_pc)
14132 {
14133   HOST_WIDE_INT count = XVECLEN (op, 0);
14134   rtx reg, mem, addr;
14135   unsigned regno;
14136   unsigned first_regno;
14137   HOST_WIDE_INT i = 1, base = 0, offset = 0;
14138   rtx elt;
14139   bool addr_reg_in_reglist = false;
14140   bool update = false;
14141   int reg_increment;
14142   int offset_adj;
14143   int regs_per_val;
14144
14145   /* If not in SImode, then registers must be consecutive
14146      (e.g., VLDM instructions for DFmode).  */
14147   gcc_assert ((mode == SImode) || consecutive);
14148   /* Setting return_pc for stores is illegal.  */
14149   gcc_assert (!return_pc || load);
14150
14151   /* Set up the increments and the regs per val based on the mode.  */
14152   reg_increment = GET_MODE_SIZE (mode);
14153   regs_per_val = reg_increment / 4;
14154   offset_adj = return_pc ? 1 : 0;
14155
14156   if (count <= 1
14157       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14158       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14159     return false;
14160
14161   /* Check if this is a write-back.  */
14162   elt = XVECEXP (op, 0, offset_adj);
14163   if (GET_CODE (SET_SRC (elt)) == PLUS)
14164     {
14165       i++;
14166       base = 1;
14167       update = true;
14168
14169       /* The offset adjustment must be the number of registers being
14170          popped times the size of a single register.  */
14171       if (!REG_P (SET_DEST (elt))
14172           || !REG_P (XEXP (SET_SRC (elt), 0))
14173           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14174           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14175           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14176              ((count - 1 - offset_adj) * reg_increment))
14177         return false;
14178     }
14179
14180   i = i + offset_adj;
14181   base = base + offset_adj;
14182   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14183      success depends on the type: VLDM can do just one reg,
14184      LDM must do at least two.  */
14185   if ((count <= i) && (mode == SImode))
14186       return false;
14187
14188   elt = XVECEXP (op, 0, i - 1);
14189   if (GET_CODE (elt) != SET)
14190     return false;
14191
14192   if (load)
14193     {
14194       reg = SET_DEST (elt);
14195       mem = SET_SRC (elt);
14196     }
14197   else
14198     {
14199       reg = SET_SRC (elt);
14200       mem = SET_DEST (elt);
14201     }
14202
14203   if (!REG_P (reg) || !MEM_P (mem))
14204     return false;
14205
14206   regno = REGNO (reg);
14207   first_regno = regno;
14208   addr = XEXP (mem, 0);
14209   if (GET_CODE (addr) == PLUS)
14210     {
14211       if (!CONST_INT_P (XEXP (addr, 1)))
14212         return false;
14213
14214       offset = INTVAL (XEXP (addr, 1));
14215       addr = XEXP (addr, 0);
14216     }
14217
14218   if (!REG_P (addr))
14219     return false;
14220
14221   /* Don't allow SP to be loaded unless it is also the base register. It
14222      guarantees that SP is reset correctly when an LDM instruction
14223      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14224   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14225     return false;
14226
14227   if (regno == REGNO (addr))
14228     addr_reg_in_reglist = true;
14229
14230   for (; i < count; i++)
14231     {
14232       elt = XVECEXP (op, 0, i);
14233       if (GET_CODE (elt) != SET)
14234         return false;
14235
14236       if (load)
14237         {
14238           reg = SET_DEST (elt);
14239           mem = SET_SRC (elt);
14240         }
14241       else
14242         {
14243           reg = SET_SRC (elt);
14244           mem = SET_DEST (elt);
14245         }
14246
14247       if (!REG_P (reg)
14248           || GET_MODE (reg) != mode
14249           || REGNO (reg) <= regno
14250           || (consecutive
14251               && (REGNO (reg) !=
14252                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14253           /* Don't allow SP to be loaded unless it is also the base register. It
14254              guarantees that SP is reset correctly when an LDM instruction
14255              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14256           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14257           || !MEM_P (mem)
14258           || GET_MODE (mem) != mode
14259           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14260                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14261                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14262                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14263                    offset + (i - base) * reg_increment))
14264               && (!REG_P (XEXP (mem, 0))
14265                   || offset + (i - base) * reg_increment != 0)))
14266         return false;
14267
14268       regno = REGNO (reg);
14269       if (regno == REGNO (addr))
14270         addr_reg_in_reglist = true;
14271     }
14272
14273   if (load)
14274     {
14275       if (update && addr_reg_in_reglist)
14276         return false;
14277
14278       /* For Thumb-1, address register is always modified - either by write-back
14279          or by explicit load.  If the pattern does not describe an update,
14280          then the address register must be in the list of loaded registers.  */
14281       if (TARGET_THUMB1)
14282         return update || addr_reg_in_reglist;
14283     }
14284
14285   return true;
14286 }
14287
14288 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14289    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14290    following form:
14291
14292    [(set (reg:SI <N>) (const_int 0))
14293     (set (reg:SI <M>) (const_int 0))
14294     ...
14295     (unspec_volatile [(const_int 0)]
14296                      VUNSPEC_CLRM_APSR)
14297     (clobber (reg:CC CC_REGNUM))
14298    ]
14299
14300    Any number (including 0) of set expressions is valid, the volatile unspec is
14301    optional.  All registers but SP and PC are allowed and registers must be in
14302    strict increasing order.
14303
14304    To be a valid VSCCLRM pattern, OP must have the following form:
14305
14306    [(unspec_volatile [(const_int 0)]
14307                      VUNSPEC_VSCCLRM_VPR)
14308     (set (reg:SF <N>) (const_int 0))
14309     (set (reg:SF <M>) (const_int 0))
14310     ...
14311    ]
14312
14313    As with CLRM, any number (including 0) of set expressions is valid, however
14314    the volatile unspec is mandatory here.  Any VFP single-precision register is
14315    accepted but all registers must be consecutive and in increasing order.  */
14316
14317 bool
14318 clear_operation_p (rtx op, bool vfp)
14319 {
14320   unsigned regno;
14321   unsigned last_regno = INVALID_REGNUM;
14322   rtx elt, reg, zero;
14323   int count = XVECLEN (op, 0);
14324   int first_set = vfp ? 1 : 0;
14325   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14326
14327   for (int i = first_set; i < count; i++)
14328     {
14329       elt = XVECEXP (op, 0, i);
14330
14331       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14332         {
14333           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14334               || XVECLEN (elt, 0) != 1
14335               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14336               || i != count - 2)
14337             return false;
14338
14339           continue;
14340         }
14341
14342       if (GET_CODE (elt) == CLOBBER)
14343         continue;
14344
14345       if (GET_CODE (elt) != SET)
14346         return false;
14347
14348       reg = SET_DEST (elt);
14349       zero = SET_SRC (elt);
14350
14351       if (!REG_P (reg)
14352           || GET_MODE (reg) != expected_mode
14353           || zero != CONST0_RTX (SImode))
14354         return false;
14355
14356       regno = REGNO (reg);
14357
14358       if (vfp)
14359         {
14360           if (i != first_set && regno != last_regno + 1)
14361             return false;
14362         }
14363       else
14364         {
14365           if (regno == SP_REGNUM || regno == PC_REGNUM)
14366             return false;
14367           if (i != first_set && regno <= last_regno)
14368             return false;
14369         }
14370
14371       last_regno = regno;
14372     }
14373
14374   return true;
14375 }
14376
14377 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14378    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14379    instruction.  ADD_OFFSET is nonzero if the base address register needs
14380    to be modified with an add instruction before we can use it.  */
14381
14382 static bool
14383 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14384                                  int nops, HOST_WIDE_INT add_offset)
14385  {
14386   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14387      if the offset isn't small enough.  The reason 2 ldrs are faster
14388      is because these ARMs are able to do more than one cache access
14389      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14390      whilst the ARM8 has a double bandwidth cache.  This means that
14391      these cores can do both an instruction fetch and a data fetch in
14392      a single cycle, so the trick of calculating the address into a
14393      scratch register (one of the result regs) and then doing a load
14394      multiple actually becomes slower (and no smaller in code size).
14395      That is the transformation
14396
14397         ldr     rd1, [rbase + offset]
14398         ldr     rd2, [rbase + offset + 4]
14399
14400      to
14401
14402         add     rd1, rbase, offset
14403         ldmia   rd1, {rd1, rd2}
14404
14405      produces worse code -- '3 cycles + any stalls on rd2' instead of
14406      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14407      access per cycle, the first sequence could never complete in less
14408      than 6 cycles, whereas the ldm sequence would only take 5 and
14409      would make better use of sequential accesses if not hitting the
14410      cache.
14411
14412      We cheat here and test 'arm_ld_sched' which we currently know to
14413      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14414      changes, then the test below needs to be reworked.  */
14415   if (nops == 2 && arm_ld_sched && add_offset != 0)
14416     return false;
14417
14418   /* XScale has load-store double instructions, but they have stricter
14419      alignment requirements than load-store multiple, so we cannot
14420      use them.
14421
14422      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14423      the pipeline until completion.
14424
14425         NREGS           CYCLES
14426           1               3
14427           2               4
14428           3               5
14429           4               6
14430
14431      An ldr instruction takes 1-3 cycles, but does not block the
14432      pipeline.
14433
14434         NREGS           CYCLES
14435           1              1-3
14436           2              2-6
14437           3              3-9
14438           4              4-12
14439
14440      Best case ldr will always win.  However, the more ldr instructions
14441      we issue, the less likely we are to be able to schedule them well.
14442      Using ldr instructions also increases code size.
14443
14444      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14445      for counts of 3 or 4 regs.  */
14446   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14447     return false;
14448   return true;
14449 }
14450
14451 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14452    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14453    an array ORDER which describes the sequence to use when accessing the
14454    offsets that produces an ascending order.  In this sequence, each
14455    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14456    must have been filled in with the lowest offset by the caller.
14457    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14458    we use to verify that ORDER produces an ascending order of registers.
14459    Return true if it was possible to construct such an order, false if
14460    not.  */
14461
14462 static bool
14463 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14464                       int *unsorted_regs)
14465 {
14466   int i;
14467   for (i = 1; i < nops; i++)
14468     {
14469       int j;
14470
14471       order[i] = order[i - 1];
14472       for (j = 0; j < nops; j++)
14473         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14474           {
14475             /* We must find exactly one offset that is higher than the
14476                previous one by 4.  */
14477             if (order[i] != order[i - 1])
14478               return false;
14479             order[i] = j;
14480           }
14481       if (order[i] == order[i - 1])
14482         return false;
14483       /* The register numbers must be ascending.  */
14484       if (unsorted_regs != NULL
14485           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14486         return false;
14487     }
14488   return true;
14489 }
14490
14491 /* Used to determine in a peephole whether a sequence of load
14492    instructions can be changed into a load-multiple instruction.
14493    NOPS is the number of separate load instructions we are examining.  The
14494    first NOPS entries in OPERANDS are the destination registers, the
14495    next NOPS entries are memory operands.  If this function is
14496    successful, *BASE is set to the common base register of the memory
14497    accesses; *LOAD_OFFSET is set to the first memory location's offset
14498    from that base register.
14499    REGS is an array filled in with the destination register numbers.
14500    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14501    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14502    the sequence of registers in REGS matches the loads from ascending memory
14503    locations, and the function verifies that the register numbers are
14504    themselves ascending.  If CHECK_REGS is false, the register numbers
14505    are stored in the order they are found in the operands.  */
14506 static int
14507 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14508                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14509 {
14510   int unsorted_regs[MAX_LDM_STM_OPS];
14511   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14512   int order[MAX_LDM_STM_OPS];
14513   int base_reg = -1;
14514   int i, ldm_case;
14515
14516   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14517      easily extended if required.  */
14518   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14519
14520   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14521
14522   /* Loop over the operands and check that the memory references are
14523      suitable (i.e. immediate offsets from the same base register).  At
14524      the same time, extract the target register, and the memory
14525      offsets.  */
14526   for (i = 0; i < nops; i++)
14527     {
14528       rtx reg;
14529       rtx offset;
14530
14531       /* Convert a subreg of a mem into the mem itself.  */
14532       if (GET_CODE (operands[nops + i]) == SUBREG)
14533         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14534
14535       gcc_assert (MEM_P (operands[nops + i]));
14536
14537       /* Don't reorder volatile memory references; it doesn't seem worth
14538          looking for the case where the order is ok anyway.  */
14539       if (MEM_VOLATILE_P (operands[nops + i]))
14540         return 0;
14541
14542       offset = const0_rtx;
14543
14544       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14545            || (SUBREG_P (reg)
14546                && REG_P (reg = SUBREG_REG (reg))))
14547           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14548               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14549                   || (SUBREG_P (reg)
14550                       && REG_P (reg = SUBREG_REG (reg))))
14551               && (CONST_INT_P (offset
14552                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14553         {
14554           if (i == 0)
14555             {
14556               base_reg = REGNO (reg);
14557               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14558                 return 0;
14559             }
14560           else if (base_reg != (int) REGNO (reg))
14561             /* Not addressed from the same base register.  */
14562             return 0;
14563
14564           unsorted_regs[i] = (REG_P (operands[i])
14565                               ? REGNO (operands[i])
14566                               : REGNO (SUBREG_REG (operands[i])));
14567
14568           /* If it isn't an integer register, or if it overwrites the
14569              base register but isn't the last insn in the list, then
14570              we can't do this.  */
14571           if (unsorted_regs[i] < 0
14572               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14573               || unsorted_regs[i] > 14
14574               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14575             return 0;
14576
14577           /* Don't allow SP to be loaded unless it is also the base
14578              register.  It guarantees that SP is reset correctly when
14579              an LDM instruction is interrupted.  Otherwise, we might
14580              end up with a corrupt stack.  */
14581           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14582             return 0;
14583
14584           unsorted_offsets[i] = INTVAL (offset);
14585           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14586             order[0] = i;
14587         }
14588       else
14589         /* Not a suitable memory address.  */
14590         return 0;
14591     }
14592
14593   /* All the useful information has now been extracted from the
14594      operands into unsorted_regs and unsorted_offsets; additionally,
14595      order[0] has been set to the lowest offset in the list.  Sort
14596      the offsets into order, verifying that they are adjacent, and
14597      check that the register numbers are ascending.  */
14598   if (!compute_offset_order (nops, unsorted_offsets, order,
14599                              check_regs ? unsorted_regs : NULL))
14600     return 0;
14601
14602   if (saved_order)
14603     memcpy (saved_order, order, sizeof order);
14604
14605   if (base)
14606     {
14607       *base = base_reg;
14608
14609       for (i = 0; i < nops; i++)
14610         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14611
14612       *load_offset = unsorted_offsets[order[0]];
14613     }
14614
14615   if (unsorted_offsets[order[0]] == 0)
14616     ldm_case = 1; /* ldmia */
14617   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14618     ldm_case = 2; /* ldmib */
14619   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14620     ldm_case = 3; /* ldmda */
14621   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14622     ldm_case = 4; /* ldmdb */
14623   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14624            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14625     ldm_case = 5;
14626   else
14627     return 0;
14628
14629   if (!multiple_operation_profitable_p (false, nops,
14630                                         ldm_case == 5
14631                                         ? unsorted_offsets[order[0]] : 0))
14632     return 0;
14633
14634   return ldm_case;
14635 }
14636
14637 /* Used to determine in a peephole whether a sequence of store instructions can
14638    be changed into a store-multiple instruction.
14639    NOPS is the number of separate store instructions we are examining.
14640    NOPS_TOTAL is the total number of instructions recognized by the peephole
14641    pattern.
14642    The first NOPS entries in OPERANDS are the source registers, the next
14643    NOPS entries are memory operands.  If this function is successful, *BASE is
14644    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14645    to the first memory location's offset from that base register.  REGS is an
14646    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14647    likewise filled with the corresponding rtx's.
14648    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14649    numbers to an ascending order of stores.
14650    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14651    from ascending memory locations, and the function verifies that the register
14652    numbers are themselves ascending.  If CHECK_REGS is false, the register
14653    numbers are stored in the order they are found in the operands.  */
14654 static int
14655 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14656                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14657                          HOST_WIDE_INT *load_offset, bool check_regs)
14658 {
14659   int unsorted_regs[MAX_LDM_STM_OPS];
14660   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14661   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14662   int order[MAX_LDM_STM_OPS];
14663   int base_reg = -1;
14664   rtx base_reg_rtx = NULL;
14665   int i, stm_case;
14666
14667   /* Write back of base register is currently only supported for Thumb 1.  */
14668   int base_writeback = TARGET_THUMB1;
14669
14670   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14671      easily extended if required.  */
14672   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14673
14674   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14675
14676   /* Loop over the operands and check that the memory references are
14677      suitable (i.e. immediate offsets from the same base register).  At
14678      the same time, extract the target register, and the memory
14679      offsets.  */
14680   for (i = 0; i < nops; i++)
14681     {
14682       rtx reg;
14683       rtx offset;
14684
14685       /* Convert a subreg of a mem into the mem itself.  */
14686       if (GET_CODE (operands[nops + i]) == SUBREG)
14687         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14688
14689       gcc_assert (MEM_P (operands[nops + i]));
14690
14691       /* Don't reorder volatile memory references; it doesn't seem worth
14692          looking for the case where the order is ok anyway.  */
14693       if (MEM_VOLATILE_P (operands[nops + i]))
14694         return 0;
14695
14696       offset = const0_rtx;
14697
14698       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14699            || (SUBREG_P (reg)
14700                && REG_P (reg = SUBREG_REG (reg))))
14701           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14702               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14703                   || (SUBREG_P (reg)
14704                       && REG_P (reg = SUBREG_REG (reg))))
14705               && (CONST_INT_P (offset
14706                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14707         {
14708           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14709                                   ? operands[i] : SUBREG_REG (operands[i]));
14710           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14711
14712           if (i == 0)
14713             {
14714               base_reg = REGNO (reg);
14715               base_reg_rtx = reg;
14716               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14717                 return 0;
14718             }
14719           else if (base_reg != (int) REGNO (reg))
14720             /* Not addressed from the same base register.  */
14721             return 0;
14722
14723           /* If it isn't an integer register, then we can't do this.  */
14724           if (unsorted_regs[i] < 0
14725               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14726               /* The effects are unpredictable if the base register is
14727                  both updated and stored.  */
14728               || (base_writeback && unsorted_regs[i] == base_reg)
14729               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14730               || unsorted_regs[i] > 14)
14731             return 0;
14732
14733           unsorted_offsets[i] = INTVAL (offset);
14734           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14735             order[0] = i;
14736         }
14737       else
14738         /* Not a suitable memory address.  */
14739         return 0;
14740     }
14741
14742   /* All the useful information has now been extracted from the
14743      operands into unsorted_regs and unsorted_offsets; additionally,
14744      order[0] has been set to the lowest offset in the list.  Sort
14745      the offsets into order, verifying that they are adjacent, and
14746      check that the register numbers are ascending.  */
14747   if (!compute_offset_order (nops, unsorted_offsets, order,
14748                              check_regs ? unsorted_regs : NULL))
14749     return 0;
14750
14751   if (saved_order)
14752     memcpy (saved_order, order, sizeof order);
14753
14754   if (base)
14755     {
14756       *base = base_reg;
14757
14758       for (i = 0; i < nops; i++)
14759         {
14760           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14761           if (reg_rtxs)
14762             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14763         }
14764
14765       *load_offset = unsorted_offsets[order[0]];
14766     }
14767
14768   if (TARGET_THUMB1
14769       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14770     return 0;
14771
14772   if (unsorted_offsets[order[0]] == 0)
14773     stm_case = 1; /* stmia */
14774   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14775     stm_case = 2; /* stmib */
14776   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14777     stm_case = 3; /* stmda */
14778   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14779     stm_case = 4; /* stmdb */
14780   else
14781     return 0;
14782
14783   if (!multiple_operation_profitable_p (false, nops, 0))
14784     return 0;
14785
14786   return stm_case;
14787 }
14788 \f
14789 /* Routines for use in generating RTL.  */
14790
14791 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14792    the instruction; REGS and MEMS are arrays containing the operands.
14793    BASEREG is the base register to be used in addressing the memory operands.
14794    WBACK_OFFSET is nonzero if the instruction should update the base
14795    register.  */
14796
14797 static rtx
14798 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14799                          HOST_WIDE_INT wback_offset)
14800 {
14801   int i = 0, j;
14802   rtx result;
14803
14804   if (!multiple_operation_profitable_p (false, count, 0))
14805     {
14806       rtx seq;
14807
14808       start_sequence ();
14809
14810       for (i = 0; i < count; i++)
14811         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14812
14813       if (wback_offset != 0)
14814         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14815
14816       seq = get_insns ();
14817       end_sequence ();
14818
14819       return seq;
14820     }
14821
14822   result = gen_rtx_PARALLEL (VOIDmode,
14823                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14824   if (wback_offset != 0)
14825     {
14826       XVECEXP (result, 0, 0)
14827         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14828       i = 1;
14829       count++;
14830     }
14831
14832   for (j = 0; i < count; i++, j++)
14833     XVECEXP (result, 0, i)
14834       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14835
14836   return result;
14837 }
14838
14839 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14840    the instruction; REGS and MEMS are arrays containing the operands.
14841    BASEREG is the base register to be used in addressing the memory operands.
14842    WBACK_OFFSET is nonzero if the instruction should update the base
14843    register.  */
14844
14845 static rtx
14846 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14847                           HOST_WIDE_INT wback_offset)
14848 {
14849   int i = 0, j;
14850   rtx result;
14851
14852   if (GET_CODE (basereg) == PLUS)
14853     basereg = XEXP (basereg, 0);
14854
14855   if (!multiple_operation_profitable_p (false, count, 0))
14856     {
14857       rtx seq;
14858
14859       start_sequence ();
14860
14861       for (i = 0; i < count; i++)
14862         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14863
14864       if (wback_offset != 0)
14865         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14866
14867       seq = get_insns ();
14868       end_sequence ();
14869
14870       return seq;
14871     }
14872
14873   result = gen_rtx_PARALLEL (VOIDmode,
14874                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14875   if (wback_offset != 0)
14876     {
14877       XVECEXP (result, 0, 0)
14878         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14879       i = 1;
14880       count++;
14881     }
14882
14883   for (j = 0; i < count; i++, j++)
14884     XVECEXP (result, 0, i)
14885       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14886
14887   return result;
14888 }
14889
14890 /* Generate either a load-multiple or a store-multiple instruction.  This
14891    function can be used in situations where we can start with a single MEM
14892    rtx and adjust its address upwards.
14893    COUNT is the number of operations in the instruction, not counting a
14894    possible update of the base register.  REGS is an array containing the
14895    register operands.
14896    BASEREG is the base register to be used in addressing the memory operands,
14897    which are constructed from BASEMEM.
14898    WRITE_BACK specifies whether the generated instruction should include an
14899    update of the base register.
14900    OFFSETP is used to pass an offset to and from this function; this offset
14901    is not used when constructing the address (instead BASEMEM should have an
14902    appropriate offset in its address), it is used only for setting
14903    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14904
14905 static rtx
14906 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14907                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14908 {
14909   rtx mems[MAX_LDM_STM_OPS];
14910   HOST_WIDE_INT offset = *offsetp;
14911   int i;
14912
14913   gcc_assert (count <= MAX_LDM_STM_OPS);
14914
14915   if (GET_CODE (basereg) == PLUS)
14916     basereg = XEXP (basereg, 0);
14917
14918   for (i = 0; i < count; i++)
14919     {
14920       rtx addr = plus_constant (Pmode, basereg, i * 4);
14921       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14922       offset += 4;
14923     }
14924
14925   if (write_back)
14926     *offsetp = offset;
14927
14928   if (is_load)
14929     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14930                                     write_back ? 4 * count : 0);
14931   else
14932     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14933                                      write_back ? 4 * count : 0);
14934 }
14935
14936 rtx
14937 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14938                        rtx basemem, HOST_WIDE_INT *offsetp)
14939 {
14940   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14941                               offsetp);
14942 }
14943
14944 rtx
14945 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14946                         rtx basemem, HOST_WIDE_INT *offsetp)
14947 {
14948   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14949                               offsetp);
14950 }
14951
14952 /* Called from a peephole2 expander to turn a sequence of loads into an
14953    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14954    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14955    is true if we can reorder the registers because they are used commutatively
14956    subsequently.
14957    Returns true iff we could generate a new instruction.  */
14958
14959 bool
14960 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14961 {
14962   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14963   rtx mems[MAX_LDM_STM_OPS];
14964   int i, j, base_reg;
14965   rtx base_reg_rtx;
14966   HOST_WIDE_INT offset;
14967   int write_back = FALSE;
14968   int ldm_case;
14969   rtx addr;
14970
14971   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14972                                      &base_reg, &offset, !sort_regs);
14973
14974   if (ldm_case == 0)
14975     return false;
14976
14977   if (sort_regs)
14978     for (i = 0; i < nops - 1; i++)
14979       for (j = i + 1; j < nops; j++)
14980         if (regs[i] > regs[j])
14981           {
14982             int t = regs[i];
14983             regs[i] = regs[j];
14984             regs[j] = t;
14985           }
14986   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14987
14988   if (TARGET_THUMB1)
14989     {
14990       gcc_assert (ldm_case == 1 || ldm_case == 5);
14991
14992       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
14993       write_back = true;
14994       for (i = 0; i < nops; i++)
14995         if (base_reg == regs[i])
14996           write_back = false;
14997
14998       /* Ensure the base is dead if it is updated.  */
14999       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15000         return false;
15001     }
15002
15003   if (ldm_case == 5)
15004     {
15005       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15006       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15007       offset = 0;
15008       base_reg_rtx = newbase;
15009     }
15010
15011   for (i = 0; i < nops; i++)
15012     {
15013       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15014       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15015                                               SImode, addr, 0);
15016     }
15017   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15018                                       write_back ? offset + i * 4 : 0));
15019   return true;
15020 }
15021
15022 /* Called from a peephole2 expander to turn a sequence of stores into an
15023    STM instruction.  OPERANDS are the operands found by the peephole matcher;
15024    NOPS indicates how many separate stores we are trying to combine.
15025    Returns true iff we could generate a new instruction.  */
15026
15027 bool
15028 gen_stm_seq (rtx *operands, int nops)
15029 {
15030   int i;
15031   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15032   rtx mems[MAX_LDM_STM_OPS];
15033   int base_reg;
15034   rtx base_reg_rtx;
15035   HOST_WIDE_INT offset;
15036   int write_back = FALSE;
15037   int stm_case;
15038   rtx addr;
15039   bool base_reg_dies;
15040
15041   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15042                                       mem_order, &base_reg, &offset, true);
15043
15044   if (stm_case == 0)
15045     return false;
15046
15047   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15048
15049   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15050   if (TARGET_THUMB1)
15051     {
15052       gcc_assert (base_reg_dies);
15053       write_back = TRUE;
15054     }
15055
15056   if (stm_case == 5)
15057     {
15058       gcc_assert (base_reg_dies);
15059       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15060       offset = 0;
15061     }
15062
15063   addr = plus_constant (Pmode, base_reg_rtx, offset);
15064
15065   for (i = 0; i < nops; i++)
15066     {
15067       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15068       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15069                                               SImode, addr, 0);
15070     }
15071   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15072                                        write_back ? offset + i * 4 : 0));
15073   return true;
15074 }
15075
15076 /* Called from a peephole2 expander to turn a sequence of stores that are
15077    preceded by constant loads into an STM instruction.  OPERANDS are the
15078    operands found by the peephole matcher; NOPS indicates how many
15079    separate stores we are trying to combine; there are 2 * NOPS
15080    instructions in the peephole.
15081    Returns true iff we could generate a new instruction.  */
15082
15083 bool
15084 gen_const_stm_seq (rtx *operands, int nops)
15085 {
15086   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15087   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15088   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15089   rtx mems[MAX_LDM_STM_OPS];
15090   int base_reg;
15091   rtx base_reg_rtx;
15092   HOST_WIDE_INT offset;
15093   int write_back = FALSE;
15094   int stm_case;
15095   rtx addr;
15096   bool base_reg_dies;
15097   int i, j;
15098   HARD_REG_SET allocated;
15099
15100   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15101                                       mem_order, &base_reg, &offset, false);
15102
15103   if (stm_case == 0)
15104     return false;
15105
15106   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15107
15108   /* If the same register is used more than once, try to find a free
15109      register.  */
15110   CLEAR_HARD_REG_SET (allocated);
15111   for (i = 0; i < nops; i++)
15112     {
15113       for (j = i + 1; j < nops; j++)
15114         if (regs[i] == regs[j])
15115           {
15116             rtx t = peep2_find_free_register (0, nops * 2,
15117                                               TARGET_THUMB1 ? "l" : "r",
15118                                               SImode, &allocated);
15119             if (t == NULL_RTX)
15120               return false;
15121             reg_rtxs[i] = t;
15122             regs[i] = REGNO (t);
15123           }
15124     }
15125
15126   /* Compute an ordering that maps the register numbers to an ascending
15127      sequence.  */
15128   reg_order[0] = 0;
15129   for (i = 0; i < nops; i++)
15130     if (regs[i] < regs[reg_order[0]])
15131       reg_order[0] = i;
15132
15133   for (i = 1; i < nops; i++)
15134     {
15135       int this_order = reg_order[i - 1];
15136       for (j = 0; j < nops; j++)
15137         if (regs[j] > regs[reg_order[i - 1]]
15138             && (this_order == reg_order[i - 1]
15139                 || regs[j] < regs[this_order]))
15140           this_order = j;
15141       reg_order[i] = this_order;
15142     }
15143
15144   /* Ensure that registers that must be live after the instruction end
15145      up with the correct value.  */
15146   for (i = 0; i < nops; i++)
15147     {
15148       int this_order = reg_order[i];
15149       if ((this_order != mem_order[i]
15150            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15151           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15152         return false;
15153     }
15154
15155   /* Load the constants.  */
15156   for (i = 0; i < nops; i++)
15157     {
15158       rtx op = operands[2 * nops + mem_order[i]];
15159       sorted_regs[i] = regs[reg_order[i]];
15160       emit_move_insn (reg_rtxs[reg_order[i]], op);
15161     }
15162
15163   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15164
15165   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15166   if (TARGET_THUMB1)
15167     {
15168       gcc_assert (base_reg_dies);
15169       write_back = TRUE;
15170     }
15171
15172   if (stm_case == 5)
15173     {
15174       gcc_assert (base_reg_dies);
15175       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15176       offset = 0;
15177     }
15178
15179   addr = plus_constant (Pmode, base_reg_rtx, offset);
15180
15181   for (i = 0; i < nops; i++)
15182     {
15183       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15184       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15185                                               SImode, addr, 0);
15186     }
15187   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15188                                        write_back ? offset + i * 4 : 0));
15189   return true;
15190 }
15191
15192 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15193    unaligned copies on processors which support unaligned semantics for those
15194    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
15195    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15196    An interleave factor of 1 (the minimum) will perform no interleaving.
15197    Load/store multiple are used for aligned addresses where possible.  */
15198
15199 static void
15200 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15201                                    HOST_WIDE_INT length,
15202                                    unsigned int interleave_factor)
15203 {
15204   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15205   int *regnos = XALLOCAVEC (int, interleave_factor);
15206   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15207   HOST_WIDE_INT i, j;
15208   HOST_WIDE_INT remaining = length, words;
15209   rtx halfword_tmp = NULL, byte_tmp = NULL;
15210   rtx dst, src;
15211   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15212   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15213   HOST_WIDE_INT srcoffset, dstoffset;
15214   HOST_WIDE_INT src_autoinc, dst_autoinc;
15215   rtx mem, addr;
15216
15217   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15218
15219   /* Use hard registers if we have aligned source or destination so we can use
15220      load/store multiple with contiguous registers.  */
15221   if (dst_aligned || src_aligned)
15222     for (i = 0; i < interleave_factor; i++)
15223       regs[i] = gen_rtx_REG (SImode, i);
15224   else
15225     for (i = 0; i < interleave_factor; i++)
15226       regs[i] = gen_reg_rtx (SImode);
15227
15228   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15229   src = copy_addr_to_reg (XEXP (srcbase, 0));
15230
15231   srcoffset = dstoffset = 0;
15232
15233   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15234      For copying the last bytes we want to subtract this offset again.  */
15235   src_autoinc = dst_autoinc = 0;
15236
15237   for (i = 0; i < interleave_factor; i++)
15238     regnos[i] = i;
15239
15240   /* Copy BLOCK_SIZE_BYTES chunks.  */
15241
15242   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15243     {
15244       /* Load words.  */
15245       if (src_aligned && interleave_factor > 1)
15246         {
15247           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15248                                             TRUE, srcbase, &srcoffset));
15249           src_autoinc += UNITS_PER_WORD * interleave_factor;
15250         }
15251       else
15252         {
15253           for (j = 0; j < interleave_factor; j++)
15254             {
15255               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15256                                                  - src_autoinc));
15257               mem = adjust_automodify_address (srcbase, SImode, addr,
15258                                                srcoffset + j * UNITS_PER_WORD);
15259               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15260             }
15261           srcoffset += block_size_bytes;
15262         }
15263
15264       /* Store words.  */
15265       if (dst_aligned && interleave_factor > 1)
15266         {
15267           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15268                                              TRUE, dstbase, &dstoffset));
15269           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15270         }
15271       else
15272         {
15273           for (j = 0; j < interleave_factor; j++)
15274             {
15275               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15276                                                  - dst_autoinc));
15277               mem = adjust_automodify_address (dstbase, SImode, addr,
15278                                                dstoffset + j * UNITS_PER_WORD);
15279               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15280             }
15281           dstoffset += block_size_bytes;
15282         }
15283
15284       remaining -= block_size_bytes;
15285     }
15286
15287   /* Copy any whole words left (note these aren't interleaved with any
15288      subsequent halfword/byte load/stores in the interests of simplicity).  */
15289
15290   words = remaining / UNITS_PER_WORD;
15291
15292   gcc_assert (words < interleave_factor);
15293
15294   if (src_aligned && words > 1)
15295     {
15296       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15297                                         &srcoffset));
15298       src_autoinc += UNITS_PER_WORD * words;
15299     }
15300   else
15301     {
15302       for (j = 0; j < words; j++)
15303         {
15304           addr = plus_constant (Pmode, src,
15305                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15306           mem = adjust_automodify_address (srcbase, SImode, addr,
15307                                            srcoffset + j * UNITS_PER_WORD);
15308           if (src_aligned)
15309             emit_move_insn (regs[j], mem);
15310           else
15311             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15312         }
15313       srcoffset += words * UNITS_PER_WORD;
15314     }
15315
15316   if (dst_aligned && words > 1)
15317     {
15318       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15319                                          &dstoffset));
15320       dst_autoinc += words * UNITS_PER_WORD;
15321     }
15322   else
15323     {
15324       for (j = 0; j < words; j++)
15325         {
15326           addr = plus_constant (Pmode, dst,
15327                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15328           mem = adjust_automodify_address (dstbase, SImode, addr,
15329                                            dstoffset + j * UNITS_PER_WORD);
15330           if (dst_aligned)
15331             emit_move_insn (mem, regs[j]);
15332           else
15333             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15334         }
15335       dstoffset += words * UNITS_PER_WORD;
15336     }
15337
15338   remaining -= words * UNITS_PER_WORD;
15339
15340   gcc_assert (remaining < 4);
15341
15342   /* Copy a halfword if necessary.  */
15343
15344   if (remaining >= 2)
15345     {
15346       halfword_tmp = gen_reg_rtx (SImode);
15347
15348       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15349       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15350       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15351
15352       /* Either write out immediately, or delay until we've loaded the last
15353          byte, depending on interleave factor.  */
15354       if (interleave_factor == 1)
15355         {
15356           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15357           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15358           emit_insn (gen_unaligned_storehi (mem,
15359                        gen_lowpart (HImode, halfword_tmp)));
15360           halfword_tmp = NULL;
15361           dstoffset += 2;
15362         }
15363
15364       remaining -= 2;
15365       srcoffset += 2;
15366     }
15367
15368   gcc_assert (remaining < 2);
15369
15370   /* Copy last byte.  */
15371
15372   if ((remaining & 1) != 0)
15373     {
15374       byte_tmp = gen_reg_rtx (SImode);
15375
15376       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15377       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15378       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15379
15380       if (interleave_factor == 1)
15381         {
15382           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15383           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15384           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15385           byte_tmp = NULL;
15386           dstoffset++;
15387         }
15388
15389       remaining--;
15390       srcoffset++;
15391     }
15392
15393   /* Store last halfword if we haven't done so already.  */
15394
15395   if (halfword_tmp)
15396     {
15397       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15398       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15399       emit_insn (gen_unaligned_storehi (mem,
15400                    gen_lowpart (HImode, halfword_tmp)));
15401       dstoffset += 2;
15402     }
15403
15404   /* Likewise for last byte.  */
15405
15406   if (byte_tmp)
15407     {
15408       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15409       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15410       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15411       dstoffset++;
15412     }
15413
15414   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15415 }
15416
15417 /* From mips_adjust_block_mem:
15418
15419    Helper function for doing a loop-based block operation on memory
15420    reference MEM.  Each iteration of the loop will operate on LENGTH
15421    bytes of MEM.
15422
15423    Create a new base register for use within the loop and point it to
15424    the start of MEM.  Create a new memory reference that uses this
15425    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15426
15427 static void
15428 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15429                       rtx *loop_mem)
15430 {
15431   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15432
15433   /* Although the new mem does not refer to a known location,
15434      it does keep up to LENGTH bytes of alignment.  */
15435   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15436   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15437 }
15438
15439 /* From mips_block_move_loop:
15440
15441    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15442    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15443    the memory regions do not overlap.  */
15444
15445 static void
15446 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15447                                unsigned int interleave_factor,
15448                                HOST_WIDE_INT bytes_per_iter)
15449 {
15450   rtx src_reg, dest_reg, final_src, test;
15451   HOST_WIDE_INT leftover;
15452
15453   leftover = length % bytes_per_iter;
15454   length -= leftover;
15455
15456   /* Create registers and memory references for use within the loop.  */
15457   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15458   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15459
15460   /* Calculate the value that SRC_REG should have after the last iteration of
15461      the loop.  */
15462   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15463                                    0, 0, OPTAB_WIDEN);
15464
15465   /* Emit the start of the loop.  */
15466   rtx_code_label *label = gen_label_rtx ();
15467   emit_label (label);
15468
15469   /* Emit the loop body.  */
15470   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15471                                      interleave_factor);
15472
15473   /* Move on to the next block.  */
15474   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15475   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15476
15477   /* Emit the loop condition.  */
15478   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15479   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15480
15481   /* Mop up any left-over bytes.  */
15482   if (leftover)
15483     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15484 }
15485
15486 /* Emit a block move when either the source or destination is unaligned (not
15487    aligned to a four-byte boundary).  This may need further tuning depending on
15488    core type, optimize_size setting, etc.  */
15489
15490 static int
15491 arm_cpymemqi_unaligned (rtx *operands)
15492 {
15493   HOST_WIDE_INT length = INTVAL (operands[2]);
15494
15495   if (optimize_size)
15496     {
15497       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15498       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15499       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15500          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15501          or dst_aligned though: allow more interleaving in those cases since the
15502          resulting code can be smaller.  */
15503       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15504       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15505
15506       if (length > 12)
15507         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15508                                        interleave_factor, bytes_per_iter);
15509       else
15510         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15511                                            interleave_factor);
15512     }
15513   else
15514     {
15515       /* Note that the loop created by arm_block_move_unaligned_loop may be
15516          subject to loop unrolling, which makes tuning this condition a little
15517          redundant.  */
15518       if (length > 32)
15519         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15520       else
15521         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15522     }
15523
15524   return 1;
15525 }
15526
15527 int
15528 arm_gen_cpymemqi (rtx *operands)
15529 {
15530   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15531   HOST_WIDE_INT srcoffset, dstoffset;
15532   rtx src, dst, srcbase, dstbase;
15533   rtx part_bytes_reg = NULL;
15534   rtx mem;
15535
15536   if (!CONST_INT_P (operands[2])
15537       || !CONST_INT_P (operands[3])
15538       || INTVAL (operands[2]) > 64)
15539     return 0;
15540
15541   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15542     return arm_cpymemqi_unaligned (operands);
15543
15544   if (INTVAL (operands[3]) & 3)
15545     return 0;
15546
15547   dstbase = operands[0];
15548   srcbase = operands[1];
15549
15550   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15551   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15552
15553   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15554   out_words_to_go = INTVAL (operands[2]) / 4;
15555   last_bytes = INTVAL (operands[2]) & 3;
15556   dstoffset = srcoffset = 0;
15557
15558   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15559     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15560
15561   while (in_words_to_go >= 2)
15562     {
15563       if (in_words_to_go > 4)
15564         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15565                                           TRUE, srcbase, &srcoffset));
15566       else
15567         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15568                                           src, FALSE, srcbase,
15569                                           &srcoffset));
15570
15571       if (out_words_to_go)
15572         {
15573           if (out_words_to_go > 4)
15574             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15575                                                TRUE, dstbase, &dstoffset));
15576           else if (out_words_to_go != 1)
15577             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15578                                                out_words_to_go, dst,
15579                                                (last_bytes == 0
15580                                                 ? FALSE : TRUE),
15581                                                dstbase, &dstoffset));
15582           else
15583             {
15584               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15585               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15586               if (last_bytes != 0)
15587                 {
15588                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15589                   dstoffset += 4;
15590                 }
15591             }
15592         }
15593
15594       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15595       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15596     }
15597
15598   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15599   if (out_words_to_go)
15600     {
15601       rtx sreg;
15602
15603       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15604       sreg = copy_to_reg (mem);
15605
15606       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15607       emit_move_insn (mem, sreg);
15608       in_words_to_go--;
15609
15610       gcc_assert (!in_words_to_go);     /* Sanity check */
15611     }
15612
15613   if (in_words_to_go)
15614     {
15615       gcc_assert (in_words_to_go > 0);
15616
15617       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15618       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15619     }
15620
15621   gcc_assert (!last_bytes || part_bytes_reg);
15622
15623   if (BYTES_BIG_ENDIAN && last_bytes)
15624     {
15625       rtx tmp = gen_reg_rtx (SImode);
15626
15627       /* The bytes we want are in the top end of the word.  */
15628       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15629                               GEN_INT (8 * (4 - last_bytes))));
15630       part_bytes_reg = tmp;
15631
15632       while (last_bytes)
15633         {
15634           mem = adjust_automodify_address (dstbase, QImode,
15635                                            plus_constant (Pmode, dst,
15636                                                           last_bytes - 1),
15637                                            dstoffset + last_bytes - 1);
15638           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15639
15640           if (--last_bytes)
15641             {
15642               tmp = gen_reg_rtx (SImode);
15643               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15644               part_bytes_reg = tmp;
15645             }
15646         }
15647
15648     }
15649   else
15650     {
15651       if (last_bytes > 1)
15652         {
15653           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15654           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15655           last_bytes -= 2;
15656           if (last_bytes)
15657             {
15658               rtx tmp = gen_reg_rtx (SImode);
15659               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15660               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15661               part_bytes_reg = tmp;
15662               dstoffset += 2;
15663             }
15664         }
15665
15666       if (last_bytes)
15667         {
15668           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15669           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15670         }
15671     }
15672
15673   return 1;
15674 }
15675
15676 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15677 by mode size.  */
15678 inline static rtx
15679 next_consecutive_mem (rtx mem)
15680 {
15681   machine_mode mode = GET_MODE (mem);
15682   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15683   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15684
15685   return adjust_automodify_address (mem, mode, addr, offset);
15686 }
15687
15688 /* Copy using LDRD/STRD instructions whenever possible.
15689    Returns true upon success. */
15690 bool
15691 gen_cpymem_ldrd_strd (rtx *operands)
15692 {
15693   unsigned HOST_WIDE_INT len;
15694   HOST_WIDE_INT align;
15695   rtx src, dst, base;
15696   rtx reg0;
15697   bool src_aligned, dst_aligned;
15698   bool src_volatile, dst_volatile;
15699
15700   gcc_assert (CONST_INT_P (operands[2]));
15701   gcc_assert (CONST_INT_P (operands[3]));
15702
15703   len = UINTVAL (operands[2]);
15704   if (len > 64)
15705     return false;
15706
15707   /* Maximum alignment we can assume for both src and dst buffers.  */
15708   align = INTVAL (operands[3]);
15709
15710   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15711     return false;
15712
15713   /* Place src and dst addresses in registers
15714      and update the corresponding mem rtx.  */
15715   dst = operands[0];
15716   dst_volatile = MEM_VOLATILE_P (dst);
15717   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15718   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15719   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15720
15721   src = operands[1];
15722   src_volatile = MEM_VOLATILE_P (src);
15723   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15724   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15725   src = adjust_automodify_address (src, VOIDmode, base, 0);
15726
15727   if (!unaligned_access && !(src_aligned && dst_aligned))
15728     return false;
15729
15730   if (src_volatile || dst_volatile)
15731     return false;
15732
15733   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15734   if (!(dst_aligned || src_aligned))
15735     return arm_gen_cpymemqi (operands);
15736
15737   /* If the either src or dst is unaligned we'll be accessing it as pairs
15738      of unaligned SImode accesses.  Otherwise we can generate DImode
15739      ldrd/strd instructions.  */
15740   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15741   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15742
15743   while (len >= 8)
15744     {
15745       len -= 8;
15746       reg0 = gen_reg_rtx (DImode);
15747       rtx first_reg = NULL_RTX;
15748       rtx second_reg = NULL_RTX;
15749
15750       if (!src_aligned || !dst_aligned)
15751         {
15752           if (BYTES_BIG_ENDIAN)
15753             {
15754               second_reg = gen_lowpart (SImode, reg0);
15755               first_reg = gen_highpart_mode (SImode, DImode, reg0);
15756             }
15757           else
15758             {
15759               first_reg = gen_lowpart (SImode, reg0);
15760               second_reg = gen_highpart_mode (SImode, DImode, reg0);
15761             }
15762         }
15763       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15764         emit_move_insn (reg0, src);
15765       else if (src_aligned)
15766         emit_insn (gen_unaligned_loaddi (reg0, src));
15767       else
15768         {
15769           emit_insn (gen_unaligned_loadsi (first_reg, src));
15770           src = next_consecutive_mem (src);
15771           emit_insn (gen_unaligned_loadsi (second_reg, src));
15772         }
15773
15774       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15775         emit_move_insn (dst, reg0);
15776       else if (dst_aligned)
15777         emit_insn (gen_unaligned_storedi (dst, reg0));
15778       else
15779         {
15780           emit_insn (gen_unaligned_storesi (dst, first_reg));
15781           dst = next_consecutive_mem (dst);
15782           emit_insn (gen_unaligned_storesi (dst, second_reg));
15783         }
15784
15785       src = next_consecutive_mem (src);
15786       dst = next_consecutive_mem (dst);
15787     }
15788
15789   gcc_assert (len < 8);
15790   if (len >= 4)
15791     {
15792       /* More than a word but less than a double-word to copy.  Copy a word.  */
15793       reg0 = gen_reg_rtx (SImode);
15794       src = adjust_address (src, SImode, 0);
15795       dst = adjust_address (dst, SImode, 0);
15796       if (src_aligned)
15797         emit_move_insn (reg0, src);
15798       else
15799         emit_insn (gen_unaligned_loadsi (reg0, src));
15800
15801       if (dst_aligned)
15802         emit_move_insn (dst, reg0);
15803       else
15804         emit_insn (gen_unaligned_storesi (dst, reg0));
15805
15806       src = next_consecutive_mem (src);
15807       dst = next_consecutive_mem (dst);
15808       len -= 4;
15809     }
15810
15811   if (len == 0)
15812     return true;
15813
15814   /* Copy the remaining bytes.  */
15815   if (len >= 2)
15816     {
15817       dst = adjust_address (dst, HImode, 0);
15818       src = adjust_address (src, HImode, 0);
15819       reg0 = gen_reg_rtx (SImode);
15820       if (src_aligned)
15821         emit_insn (gen_zero_extendhisi2 (reg0, src));
15822       else
15823         emit_insn (gen_unaligned_loadhiu (reg0, src));
15824
15825       if (dst_aligned)
15826         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15827       else
15828         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15829
15830       src = next_consecutive_mem (src);
15831       dst = next_consecutive_mem (dst);
15832       if (len == 2)
15833         return true;
15834     }
15835
15836   dst = adjust_address (dst, QImode, 0);
15837   src = adjust_address (src, QImode, 0);
15838   reg0 = gen_reg_rtx (QImode);
15839   emit_move_insn (reg0, src);
15840   emit_move_insn (dst, reg0);
15841   return true;
15842 }
15843
15844 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15845    into its component 32-bit subregs.  OP2 may be an immediate
15846    constant and we want to simplify it in that case.  */
15847 void
15848 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15849                         rtx *lo_op2, rtx *hi_op2)
15850 {
15851   *lo_op1 = gen_lowpart (SImode, op1);
15852   *hi_op1 = gen_highpart (SImode, op1);
15853   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15854                                  subreg_lowpart_offset (SImode, DImode));
15855   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15856                                  subreg_highpart_offset (SImode, DImode));
15857 }
15858
15859 /* Select a dominance comparison mode if possible for a test of the general
15860    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15861    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15862    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15863    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15864    In all cases OP will be either EQ or NE, but we don't need to know which
15865    here.  If we are unable to support a dominance comparison we return
15866    CC mode.  This will then fail to match for the RTL expressions that
15867    generate this call.  */
15868 machine_mode
15869 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15870 {
15871   enum rtx_code cond1, cond2;
15872   int swapped = 0;
15873
15874   /* Currently we will probably get the wrong result if the individual
15875      comparisons are not simple.  This also ensures that it is safe to
15876      reverse a comparison if necessary.  */
15877   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15878        != CCmode)
15879       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15880           != CCmode))
15881     return CCmode;
15882
15883   /* The if_then_else variant of this tests the second condition if the
15884      first passes, but is true if the first fails.  Reverse the first
15885      condition to get a true "inclusive-or" expression.  */
15886   if (cond_or == DOM_CC_NX_OR_Y)
15887     cond1 = reverse_condition (cond1);
15888
15889   /* If the comparisons are not equal, and one doesn't dominate the other,
15890      then we can't do this.  */
15891   if (cond1 != cond2
15892       && !comparison_dominates_p (cond1, cond2)
15893       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15894     return CCmode;
15895
15896   if (swapped)
15897     std::swap (cond1, cond2);
15898
15899   switch (cond1)
15900     {
15901     case EQ:
15902       if (cond_or == DOM_CC_X_AND_Y)
15903         return CC_DEQmode;
15904
15905       switch (cond2)
15906         {
15907         case EQ: return CC_DEQmode;
15908         case LE: return CC_DLEmode;
15909         case LEU: return CC_DLEUmode;
15910         case GE: return CC_DGEmode;
15911         case GEU: return CC_DGEUmode;
15912         default: gcc_unreachable ();
15913         }
15914
15915     case LT:
15916       if (cond_or == DOM_CC_X_AND_Y)
15917         return CC_DLTmode;
15918
15919       switch (cond2)
15920         {
15921         case  LT:
15922             return CC_DLTmode;
15923         case LE:
15924           return CC_DLEmode;
15925         case NE:
15926           return CC_DNEmode;
15927         default:
15928           gcc_unreachable ();
15929         }
15930
15931     case GT:
15932       if (cond_or == DOM_CC_X_AND_Y)
15933         return CC_DGTmode;
15934
15935       switch (cond2)
15936         {
15937         case GT:
15938           return CC_DGTmode;
15939         case GE:
15940           return CC_DGEmode;
15941         case NE:
15942           return CC_DNEmode;
15943         default:
15944           gcc_unreachable ();
15945         }
15946
15947     case LTU:
15948       if (cond_or == DOM_CC_X_AND_Y)
15949         return CC_DLTUmode;
15950
15951       switch (cond2)
15952         {
15953         case LTU:
15954           return CC_DLTUmode;
15955         case LEU:
15956           return CC_DLEUmode;
15957         case NE:
15958           return CC_DNEmode;
15959         default:
15960           gcc_unreachable ();
15961         }
15962
15963     case GTU:
15964       if (cond_or == DOM_CC_X_AND_Y)
15965         return CC_DGTUmode;
15966
15967       switch (cond2)
15968         {
15969         case GTU:
15970           return CC_DGTUmode;
15971         case GEU:
15972           return CC_DGEUmode;
15973         case NE:
15974           return CC_DNEmode;
15975         default:
15976           gcc_unreachable ();
15977         }
15978
15979     /* The remaining cases only occur when both comparisons are the
15980        same.  */
15981     case NE:
15982       gcc_assert (cond1 == cond2);
15983       return CC_DNEmode;
15984
15985     case LE:
15986       gcc_assert (cond1 == cond2);
15987       return CC_DLEmode;
15988
15989     case GE:
15990       gcc_assert (cond1 == cond2);
15991       return CC_DGEmode;
15992
15993     case LEU:
15994       gcc_assert (cond1 == cond2);
15995       return CC_DLEUmode;
15996
15997     case GEU:
15998       gcc_assert (cond1 == cond2);
15999       return CC_DGEUmode;
16000
16001     default:
16002       gcc_unreachable ();
16003     }
16004 }
16005
16006 machine_mode
16007 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16008 {
16009   /* All floating point compares return CCFP if it is an equality
16010      comparison, and CCFPE otherwise.  */
16011   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16012     {
16013       switch (op)
16014         {
16015         case EQ:
16016         case NE:
16017         case UNORDERED:
16018         case ORDERED:
16019         case UNLT:
16020         case UNLE:
16021         case UNGT:
16022         case UNGE:
16023         case UNEQ:
16024         case LTGT:
16025           return CCFPmode;
16026
16027         case LT:
16028         case LE:
16029         case GT:
16030         case GE:
16031           return CCFPEmode;
16032
16033         default:
16034           gcc_unreachable ();
16035         }
16036     }
16037
16038   /* A compare with a shifted operand.  Because of canonicalization, the
16039      comparison will have to be swapped when we emit the assembler.  */
16040   if (GET_MODE (y) == SImode
16041       && (REG_P (y) || (SUBREG_P (y)))
16042       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16043           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16044           || GET_CODE (x) == ROTATERT))
16045     return CC_SWPmode;
16046
16047   /* A widened compare of the sum of a value plus a carry against a
16048      constant.  This is a representation of RSC.  We want to swap the
16049      result of the comparison at output.  Not valid if the Z bit is
16050      needed.  */
16051   if (GET_MODE (x) == DImode
16052       && GET_CODE (x) == PLUS
16053       && arm_borrow_operation (XEXP (x, 1), DImode)
16054       && CONST_INT_P (y)
16055       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16056            && (op == LE || op == GT))
16057           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16058               && (op == LEU || op == GTU))))
16059     return CC_SWPmode;
16060
16061   /* If X is a constant we want to use CC_RSBmode.  This is
16062      non-canonical, but arm_gen_compare_reg uses this to generate the
16063      correct canonical form.  */
16064   if (GET_MODE (y) == SImode
16065       && (REG_P (y) || SUBREG_P (y))
16066       && CONST_INT_P (x))
16067     return CC_RSBmode;
16068
16069   /* This operation is performed swapped, but since we only rely on the Z
16070      flag we don't need an additional mode.  */
16071   if (GET_MODE (y) == SImode
16072       && (REG_P (y) || (SUBREG_P (y)))
16073       && GET_CODE (x) == NEG
16074       && (op == EQ || op == NE))
16075     return CC_Zmode;
16076
16077   /* This is a special case that is used by combine to allow a
16078      comparison of a shifted byte load to be split into a zero-extend
16079      followed by a comparison of the shifted integer (only valid for
16080      equalities and unsigned inequalities).  */
16081   if (GET_MODE (x) == SImode
16082       && GET_CODE (x) == ASHIFT
16083       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16084       && GET_CODE (XEXP (x, 0)) == SUBREG
16085       && MEM_P (SUBREG_REG (XEXP (x, 0)))
16086       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16087       && (op == EQ || op == NE
16088           || op == GEU || op == GTU || op == LTU || op == LEU)
16089       && CONST_INT_P (y))
16090     return CC_Zmode;
16091
16092   /* A construct for a conditional compare, if the false arm contains
16093      0, then both conditions must be true, otherwise either condition
16094      must be true.  Not all conditions are possible, so CCmode is
16095      returned if it can't be done.  */
16096   if (GET_CODE (x) == IF_THEN_ELSE
16097       && (XEXP (x, 2) == const0_rtx
16098           || XEXP (x, 2) == const1_rtx)
16099       && COMPARISON_P (XEXP (x, 0))
16100       && COMPARISON_P (XEXP (x, 1)))
16101     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16102                                          INTVAL (XEXP (x, 2)));
16103
16104   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
16105   if (GET_CODE (x) == AND
16106       && (op == EQ || op == NE)
16107       && COMPARISON_P (XEXP (x, 0))
16108       && COMPARISON_P (XEXP (x, 1)))
16109     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16110                                          DOM_CC_X_AND_Y);
16111
16112   if (GET_CODE (x) == IOR
16113       && (op == EQ || op == NE)
16114       && COMPARISON_P (XEXP (x, 0))
16115       && COMPARISON_P (XEXP (x, 1)))
16116     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16117                                          DOM_CC_X_OR_Y);
16118
16119   /* An operation (on Thumb) where we want to test for a single bit.
16120      This is done by shifting that bit up into the top bit of a
16121      scratch register; we can then branch on the sign bit.  */
16122   if (TARGET_THUMB1
16123       && GET_MODE (x) == SImode
16124       && (op == EQ || op == NE)
16125       && GET_CODE (x) == ZERO_EXTRACT
16126       && XEXP (x, 1) == const1_rtx)
16127     return CC_Nmode;
16128
16129   /* An operation that sets the condition codes as a side-effect, the
16130      V flag is not set correctly, so we can only use comparisons where
16131      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
16132      instead.)  */
16133   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
16134   if (GET_MODE (x) == SImode
16135       && y == const0_rtx
16136       && (op == EQ || op == NE || op == LT || op == GE)
16137       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16138           || GET_CODE (x) == AND || GET_CODE (x) == IOR
16139           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16140           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16141           || GET_CODE (x) == LSHIFTRT
16142           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16143           || GET_CODE (x) == ROTATERT
16144           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16145     return CC_NZmode;
16146
16147   /* A comparison of ~reg with a const is really a special
16148      canoncialization of compare (~const, reg), which is a reverse
16149      subtract operation.  We may not get here if CONST is 0, but that
16150      doesn't matter because ~0 isn't a valid immediate for RSB.  */
16151   if (GET_MODE (x) == SImode
16152       && GET_CODE (x) == NOT
16153       && CONST_INT_P (y))
16154     return CC_RSBmode;
16155
16156   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16157     return CC_Zmode;
16158
16159   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16160       && GET_CODE (x) == PLUS
16161       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16162     return CC_Cmode;
16163
16164   if (GET_MODE (x) == DImode
16165       && GET_CODE (x) == PLUS
16166       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16167       && CONST_INT_P (y)
16168       && UINTVAL (y) == 0x800000000
16169       && (op == GEU || op == LTU))
16170     return CC_ADCmode;
16171
16172   if (GET_MODE (x) == DImode
16173       && (op == GE || op == LT)
16174       && GET_CODE (x) == SIGN_EXTEND
16175       && ((GET_CODE (y) == PLUS
16176            && arm_borrow_operation (XEXP (y, 0), DImode))
16177           || arm_borrow_operation (y, DImode)))
16178     return CC_NVmode;
16179
16180   if (GET_MODE (x) == DImode
16181       && (op == GEU || op == LTU)
16182       && GET_CODE (x) == ZERO_EXTEND
16183       && ((GET_CODE (y) == PLUS
16184            && arm_borrow_operation (XEXP (y, 0), DImode))
16185           || arm_borrow_operation (y, DImode)))
16186     return CC_Bmode;
16187
16188   if (GET_MODE (x) == DImode
16189       && (op == EQ || op == NE)
16190       && (GET_CODE (x) == PLUS
16191           || GET_CODE (x) == MINUS)
16192       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16193           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16194       && GET_CODE (y) == SIGN_EXTEND
16195       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16196     return CC_Vmode;
16197
16198   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16199     return GET_MODE (x);
16200
16201   return CCmode;
16202 }
16203
16204 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
16205    the sequence of instructions needed to generate a suitable condition
16206    code register.  Return the CC register result.  */
16207 static rtx
16208 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16209 {
16210   machine_mode mode;
16211   rtx cc_reg;
16212
16213     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
16214   gcc_assert (TARGET_32BIT);
16215   gcc_assert (!CONST_INT_P (x));
16216
16217   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16218                                   subreg_lowpart_offset (SImode, DImode));
16219   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16220                                   subreg_highpart_offset (SImode, DImode));
16221   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16222                                   subreg_lowpart_offset (SImode, DImode));
16223   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16224                                   subreg_highpart_offset (SImode, DImode));
16225   switch (code)
16226     {
16227     case EQ:
16228     case NE:
16229       {
16230         if (y_lo == const0_rtx || y_hi == const0_rtx)
16231           {
16232             if (y_lo != const0_rtx)
16233               {
16234                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16235
16236                 gcc_assert (y_hi == const0_rtx);
16237                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16238                 if (!arm_add_operand (y_lo, SImode))
16239                   y_lo = force_reg (SImode, y_lo);
16240                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16241                 x_lo = scratch2;
16242               }
16243             else if (y_hi != const0_rtx)
16244               {
16245                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16246
16247                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16248                 if (!arm_add_operand (y_hi, SImode))
16249                   y_hi = force_reg (SImode, y_hi);
16250                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16251                 x_hi = scratch2;
16252               }
16253
16254             if (!scratch)
16255               {
16256                 gcc_assert (!reload_completed);
16257                 scratch = gen_rtx_SCRATCH (SImode);
16258               }
16259
16260             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16261             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16262
16263             rtx set
16264               = gen_rtx_SET (cc_reg,
16265                              gen_rtx_COMPARE (CC_NZmode,
16266                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16267                                               const0_rtx));
16268             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16269                                                               clobber)));
16270             return cc_reg;
16271           }
16272
16273         if (!arm_add_operand (y_lo, SImode))
16274           y_lo = force_reg (SImode, y_lo);
16275
16276         if (!arm_add_operand (y_hi, SImode))
16277           y_hi = force_reg (SImode, y_hi);
16278
16279         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16280         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16281         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16282         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16283         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16284
16285         emit_insn (gen_rtx_SET (cc_reg,
16286                                 gen_rtx_COMPARE (mode, conjunction,
16287                                                  const0_rtx)));
16288         return cc_reg;
16289       }
16290
16291     case LT:
16292     case GE:
16293       {
16294         if (y_lo == const0_rtx)
16295           {
16296             /* If the low word of y is 0, then this is simply a normal
16297                compare of the upper words.  */
16298             if (!arm_add_operand (y_hi, SImode))
16299               y_hi = force_reg (SImode, y_hi);
16300
16301             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16302           }
16303
16304         if (!arm_add_operand (y_lo, SImode))
16305           y_lo = force_reg (SImode, y_lo);
16306
16307         rtx cmp1
16308           = gen_rtx_LTU (DImode,
16309                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16310                          const0_rtx);
16311
16312         if (!scratch)
16313           scratch = gen_rtx_SCRATCH (SImode);
16314
16315         if (!arm_not_operand (y_hi, SImode))
16316           y_hi = force_reg (SImode, y_hi);
16317
16318         rtx_insn *insn;
16319         if (y_hi == const0_rtx)
16320           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16321                                                            cmp1));
16322         else if (CONST_INT_P (y_hi))
16323           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16324                                                              y_hi, cmp1));
16325         else
16326           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16327                                                          cmp1));
16328         return SET_DEST (single_set (insn));
16329       }
16330
16331     case LE:
16332     case GT:
16333       {
16334         /* During expansion, we only expect to get here if y is a
16335            constant that we want to handle, otherwise we should have
16336            swapped the operands already.  */
16337         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16338
16339         if (!const_ok_for_arm (INTVAL (y_lo)))
16340           y_lo = force_reg (SImode, y_lo);
16341
16342         /* Perform a reverse subtract and compare.  */
16343         rtx cmp1
16344           = gen_rtx_LTU (DImode,
16345                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16346                          const0_rtx);
16347         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16348                                                                  x_hi, cmp1));
16349         return SET_DEST (single_set (insn));
16350       }
16351
16352     case LTU:
16353     case GEU:
16354       {
16355         if (y_lo == const0_rtx)
16356           {
16357             /* If the low word of y is 0, then this is simply a normal
16358                compare of the upper words.  */
16359             if (!arm_add_operand (y_hi, SImode))
16360               y_hi = force_reg (SImode, y_hi);
16361
16362             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16363           }
16364
16365         if (!arm_add_operand (y_lo, SImode))
16366           y_lo = force_reg (SImode, y_lo);
16367
16368         rtx cmp1
16369           = gen_rtx_LTU (DImode,
16370                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16371                          const0_rtx);
16372
16373         if (!scratch)
16374           scratch = gen_rtx_SCRATCH (SImode);
16375         if (!arm_not_operand (y_hi, SImode))
16376           y_hi = force_reg (SImode, y_hi);
16377
16378         rtx_insn *insn;
16379         if (y_hi == const0_rtx)
16380           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16381                                                           cmp1));
16382         else if (CONST_INT_P (y_hi))
16383           {
16384             /* Constant is viewed as unsigned when zero-extended.  */
16385             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16386             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16387                                                               y_hi, cmp1));
16388           }
16389         else
16390           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16391                                                         cmp1));
16392         return SET_DEST (single_set (insn));
16393       }
16394
16395     case LEU:
16396     case GTU:
16397       {
16398         /* During expansion, we only expect to get here if y is a
16399            constant that we want to handle, otherwise we should have
16400            swapped the operands already.  */
16401         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16402
16403         if (!const_ok_for_arm (INTVAL (y_lo)))
16404           y_lo = force_reg (SImode, y_lo);
16405
16406         /* Perform a reverse subtract and compare.  */
16407         rtx cmp1
16408           = gen_rtx_LTU (DImode,
16409                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16410                          const0_rtx);
16411         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16412         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16413                                                                 x_hi, cmp1));
16414         return SET_DEST (single_set (insn));
16415       }
16416
16417     default:
16418       gcc_unreachable ();
16419     }
16420 }
16421
16422 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16423    return the rtx for register 0 in the proper mode.  */
16424 rtx
16425 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16426 {
16427   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16428     return arm_gen_dicompare_reg (code, x, y, scratch);
16429
16430   machine_mode mode = SELECT_CC_MODE (code, x, y);
16431   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16432   if (mode == CC_RSBmode)
16433     {
16434       if (!scratch)
16435         scratch = gen_rtx_SCRATCH (SImode);
16436       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16437                                               GEN_INT (~UINTVAL (x)), y));
16438     }
16439   else
16440     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16441
16442   return cc_reg;
16443 }
16444
16445 /* Generate a sequence of insns that will generate the correct return
16446    address mask depending on the physical architecture that the program
16447    is running on.  */
16448 rtx
16449 arm_gen_return_addr_mask (void)
16450 {
16451   rtx reg = gen_reg_rtx (Pmode);
16452
16453   emit_insn (gen_return_addr_mask (reg));
16454   return reg;
16455 }
16456
16457 void
16458 arm_reload_in_hi (rtx *operands)
16459 {
16460   rtx ref = operands[1];
16461   rtx base, scratch;
16462   HOST_WIDE_INT offset = 0;
16463
16464   if (SUBREG_P (ref))
16465     {
16466       offset = SUBREG_BYTE (ref);
16467       ref = SUBREG_REG (ref);
16468     }
16469
16470   if (REG_P (ref))
16471     {
16472       /* We have a pseudo which has been spilt onto the stack; there
16473          are two cases here: the first where there is a simple
16474          stack-slot replacement and a second where the stack-slot is
16475          out of range, or is used as a subreg.  */
16476       if (reg_equiv_mem (REGNO (ref)))
16477         {
16478           ref = reg_equiv_mem (REGNO (ref));
16479           base = find_replacement (&XEXP (ref, 0));
16480         }
16481       else
16482         /* The slot is out of range, or was dressed up in a SUBREG.  */
16483         base = reg_equiv_address (REGNO (ref));
16484
16485       /* PR 62554: If there is no equivalent memory location then just move
16486          the value as an SImode register move.  This happens when the target
16487          architecture variant does not have an HImode register move.  */
16488       if (base == NULL)
16489         {
16490           gcc_assert (REG_P (operands[0]));
16491           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16492                                 gen_rtx_SUBREG (SImode, ref, 0)));
16493           return;
16494         }
16495     }
16496   else
16497     base = find_replacement (&XEXP (ref, 0));
16498
16499   /* Handle the case where the address is too complex to be offset by 1.  */
16500   if (GET_CODE (base) == MINUS
16501       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16502     {
16503       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16504
16505       emit_set_insn (base_plus, base);
16506       base = base_plus;
16507     }
16508   else if (GET_CODE (base) == PLUS)
16509     {
16510       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16511       HOST_WIDE_INT hi, lo;
16512
16513       offset += INTVAL (XEXP (base, 1));
16514       base = XEXP (base, 0);
16515
16516       /* Rework the address into a legal sequence of insns.  */
16517       /* Valid range for lo is -4095 -> 4095 */
16518       lo = (offset >= 0
16519             ? (offset & 0xfff)
16520             : -((-offset) & 0xfff));
16521
16522       /* Corner case, if lo is the max offset then we would be out of range
16523          once we have added the additional 1 below, so bump the msb into the
16524          pre-loading insn(s).  */
16525       if (lo == 4095)
16526         lo &= 0x7ff;
16527
16528       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16529              ^ (HOST_WIDE_INT) 0x80000000)
16530             - (HOST_WIDE_INT) 0x80000000);
16531
16532       gcc_assert (hi + lo == offset);
16533
16534       if (hi != 0)
16535         {
16536           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16537
16538           /* Get the base address; addsi3 knows how to handle constants
16539              that require more than one insn.  */
16540           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16541           base = base_plus;
16542           offset = lo;
16543         }
16544     }
16545
16546   /* Operands[2] may overlap operands[0] (though it won't overlap
16547      operands[1]), that's why we asked for a DImode reg -- so we can
16548      use the bit that does not overlap.  */
16549   if (REGNO (operands[2]) == REGNO (operands[0]))
16550     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16551   else
16552     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16553
16554   emit_insn (gen_zero_extendqisi2 (scratch,
16555                                    gen_rtx_MEM (QImode,
16556                                                 plus_constant (Pmode, base,
16557                                                                offset))));
16558   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16559                                    gen_rtx_MEM (QImode,
16560                                                 plus_constant (Pmode, base,
16561                                                                offset + 1))));
16562   if (!BYTES_BIG_ENDIAN)
16563     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16564                    gen_rtx_IOR (SImode,
16565                                 gen_rtx_ASHIFT
16566                                 (SImode,
16567                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16568                                  GEN_INT (8)),
16569                                 scratch));
16570   else
16571     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16572                    gen_rtx_IOR (SImode,
16573                                 gen_rtx_ASHIFT (SImode, scratch,
16574                                                 GEN_INT (8)),
16575                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16576 }
16577
16578 /* Handle storing a half-word to memory during reload by synthesizing as two
16579    byte stores.  Take care not to clobber the input values until after we
16580    have moved them somewhere safe.  This code assumes that if the DImode
16581    scratch in operands[2] overlaps either the input value or output address
16582    in some way, then that value must die in this insn (we absolutely need
16583    two scratch registers for some corner cases).  */
16584 void
16585 arm_reload_out_hi (rtx *operands)
16586 {
16587   rtx ref = operands[0];
16588   rtx outval = operands[1];
16589   rtx base, scratch;
16590   HOST_WIDE_INT offset = 0;
16591
16592   if (SUBREG_P (ref))
16593     {
16594       offset = SUBREG_BYTE (ref);
16595       ref = SUBREG_REG (ref);
16596     }
16597
16598   if (REG_P (ref))
16599     {
16600       /* We have a pseudo which has been spilt onto the stack; there
16601          are two cases here: the first where there is a simple
16602          stack-slot replacement and a second where the stack-slot is
16603          out of range, or is used as a subreg.  */
16604       if (reg_equiv_mem (REGNO (ref)))
16605         {
16606           ref = reg_equiv_mem (REGNO (ref));
16607           base = find_replacement (&XEXP (ref, 0));
16608         }
16609       else
16610         /* The slot is out of range, or was dressed up in a SUBREG.  */
16611         base = reg_equiv_address (REGNO (ref));
16612
16613       /* PR 62254: If there is no equivalent memory location then just move
16614          the value as an SImode register move.  This happens when the target
16615          architecture variant does not have an HImode register move.  */
16616       if (base == NULL)
16617         {
16618           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16619
16620           if (REG_P (outval))
16621             {
16622               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16623                                     gen_rtx_SUBREG (SImode, outval, 0)));
16624             }
16625           else /* SUBREG_P (outval)  */
16626             {
16627               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16628                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16629                                       SUBREG_REG (outval)));
16630               else
16631                 /* FIXME: Handle other cases ?  */
16632                 gcc_unreachable ();
16633             }
16634           return;
16635         }
16636     }
16637   else
16638     base = find_replacement (&XEXP (ref, 0));
16639
16640   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16641
16642   /* Handle the case where the address is too complex to be offset by 1.  */
16643   if (GET_CODE (base) == MINUS
16644       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16645     {
16646       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16647
16648       /* Be careful not to destroy OUTVAL.  */
16649       if (reg_overlap_mentioned_p (base_plus, outval))
16650         {
16651           /* Updating base_plus might destroy outval, see if we can
16652              swap the scratch and base_plus.  */
16653           if (!reg_overlap_mentioned_p (scratch, outval))
16654             std::swap (scratch, base_plus);
16655           else
16656             {
16657               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16658
16659               /* Be conservative and copy OUTVAL into the scratch now,
16660                  this should only be necessary if outval is a subreg
16661                  of something larger than a word.  */
16662               /* XXX Might this clobber base?  I can't see how it can,
16663                  since scratch is known to overlap with OUTVAL, and
16664                  must be wider than a word.  */
16665               emit_insn (gen_movhi (scratch_hi, outval));
16666               outval = scratch_hi;
16667             }
16668         }
16669
16670       emit_set_insn (base_plus, base);
16671       base = base_plus;
16672     }
16673   else if (GET_CODE (base) == PLUS)
16674     {
16675       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16676       HOST_WIDE_INT hi, lo;
16677
16678       offset += INTVAL (XEXP (base, 1));
16679       base = XEXP (base, 0);
16680
16681       /* Rework the address into a legal sequence of insns.  */
16682       /* Valid range for lo is -4095 -> 4095 */
16683       lo = (offset >= 0
16684             ? (offset & 0xfff)
16685             : -((-offset) & 0xfff));
16686
16687       /* Corner case, if lo is the max offset then we would be out of range
16688          once we have added the additional 1 below, so bump the msb into the
16689          pre-loading insn(s).  */
16690       if (lo == 4095)
16691         lo &= 0x7ff;
16692
16693       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16694              ^ (HOST_WIDE_INT) 0x80000000)
16695             - (HOST_WIDE_INT) 0x80000000);
16696
16697       gcc_assert (hi + lo == offset);
16698
16699       if (hi != 0)
16700         {
16701           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16702
16703           /* Be careful not to destroy OUTVAL.  */
16704           if (reg_overlap_mentioned_p (base_plus, outval))
16705             {
16706               /* Updating base_plus might destroy outval, see if we
16707                  can swap the scratch and base_plus.  */
16708               if (!reg_overlap_mentioned_p (scratch, outval))
16709                 std::swap (scratch, base_plus);
16710               else
16711                 {
16712                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16713
16714                   /* Be conservative and copy outval into scratch now,
16715                      this should only be necessary if outval is a
16716                      subreg of something larger than a word.  */
16717                   /* XXX Might this clobber base?  I can't see how it
16718                      can, since scratch is known to overlap with
16719                      outval.  */
16720                   emit_insn (gen_movhi (scratch_hi, outval));
16721                   outval = scratch_hi;
16722                 }
16723             }
16724
16725           /* Get the base address; addsi3 knows how to handle constants
16726              that require more than one insn.  */
16727           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16728           base = base_plus;
16729           offset = lo;
16730         }
16731     }
16732
16733   if (BYTES_BIG_ENDIAN)
16734     {
16735       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16736                                          plus_constant (Pmode, base,
16737                                                         offset + 1)),
16738                             gen_lowpart (QImode, outval)));
16739       emit_insn (gen_lshrsi3 (scratch,
16740                               gen_rtx_SUBREG (SImode, outval, 0),
16741                               GEN_INT (8)));
16742       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16743                                                                 offset)),
16744                             gen_lowpart (QImode, scratch)));
16745     }
16746   else
16747     {
16748       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16749                                                                 offset)),
16750                             gen_lowpart (QImode, outval)));
16751       emit_insn (gen_lshrsi3 (scratch,
16752                               gen_rtx_SUBREG (SImode, outval, 0),
16753                               GEN_INT (8)));
16754       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16755                                          plus_constant (Pmode, base,
16756                                                         offset + 1)),
16757                             gen_lowpart (QImode, scratch)));
16758     }
16759 }
16760
16761 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16762    (padded to the size of a word) should be passed in a register.  */
16763
16764 static bool
16765 arm_must_pass_in_stack (const function_arg_info &arg)
16766 {
16767   if (TARGET_AAPCS_BASED)
16768     return must_pass_in_stack_var_size (arg);
16769   else
16770     return must_pass_in_stack_var_size_or_pad (arg);
16771 }
16772
16773
16774 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16775    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16776    the default.  For AAPCS based ABIs small aggregate types are placed
16777    in the lowest memory address.  */
16778
16779 static pad_direction
16780 arm_function_arg_padding (machine_mode mode, const_tree type)
16781 {
16782   if (!TARGET_AAPCS_BASED)
16783     return default_function_arg_padding (mode, type);
16784
16785   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16786     return PAD_DOWNWARD;
16787
16788   return PAD_UPWARD;
16789 }
16790
16791
16792 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16793    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16794    register has useful data, and return the opposite if the most
16795    significant byte does.  */
16796
16797 bool
16798 arm_pad_reg_upward (machine_mode mode,
16799                     tree type, int first ATTRIBUTE_UNUSED)
16800 {
16801   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16802     {
16803       /* For AAPCS, small aggregates, small fixed-point types,
16804          and small complex types are always padded upwards.  */
16805       if (type)
16806         {
16807           if ((AGGREGATE_TYPE_P (type)
16808                || TREE_CODE (type) == COMPLEX_TYPE
16809                || FIXED_POINT_TYPE_P (type))
16810               && int_size_in_bytes (type) <= 4)
16811             return true;
16812         }
16813       else
16814         {
16815           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16816               && GET_MODE_SIZE (mode) <= 4)
16817             return true;
16818         }
16819     }
16820
16821   /* Otherwise, use default padding.  */
16822   return !BYTES_BIG_ENDIAN;
16823 }
16824
16825 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16826    assuming that the address in the base register is word aligned.  */
16827 bool
16828 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16829 {
16830   HOST_WIDE_INT max_offset;
16831
16832   /* Offset must be a multiple of 4 in Thumb mode.  */
16833   if (TARGET_THUMB2 && ((offset & 3) != 0))
16834     return false;
16835
16836   if (TARGET_THUMB2)
16837     max_offset = 1020;
16838   else if (TARGET_ARM)
16839     max_offset = 255;
16840   else
16841     return false;
16842
16843   return ((offset <= max_offset) && (offset >= -max_offset));
16844 }
16845
16846 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16847    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16848    Assumes that the address in the base register RN is word aligned.  Pattern
16849    guarantees that both memory accesses use the same base register,
16850    the offsets are constants within the range, and the gap between the offsets is 4.
16851    If preload complete then check that registers are legal.  WBACK indicates whether
16852    address is updated.  LOAD indicates whether memory access is load or store.  */
16853 bool
16854 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16855                        bool wback, bool load)
16856 {
16857   unsigned int t, t2, n;
16858
16859   if (!reload_completed)
16860     return true;
16861
16862   if (!offset_ok_for_ldrd_strd (offset))
16863     return false;
16864
16865   t = REGNO (rt);
16866   t2 = REGNO (rt2);
16867   n = REGNO (rn);
16868
16869   if ((TARGET_THUMB2)
16870       && ((wback && (n == t || n == t2))
16871           || (t == SP_REGNUM)
16872           || (t == PC_REGNUM)
16873           || (t2 == SP_REGNUM)
16874           || (t2 == PC_REGNUM)
16875           || (!load && (n == PC_REGNUM))
16876           || (load && (t == t2))
16877           /* Triggers Cortex-M3 LDRD errata.  */
16878           || (!wback && load && fix_cm3_ldrd && (n == t))))
16879     return false;
16880
16881   if ((TARGET_ARM)
16882       && ((wback && (n == t || n == t2))
16883           || (t2 == PC_REGNUM)
16884           || (t % 2 != 0)   /* First destination register is not even.  */
16885           || (t2 != t + 1)
16886           /* PC can be used as base register (for offset addressing only),
16887              but it is depricated.  */
16888           || (n == PC_REGNUM)))
16889     return false;
16890
16891   return true;
16892 }
16893
16894 /* Return true if a 64-bit access with alignment ALIGN and with a
16895    constant offset OFFSET from the base pointer is permitted on this
16896    architecture.  */
16897 static bool
16898 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16899 {
16900   return (unaligned_access
16901           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16902           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16903 }
16904
16905 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
16906    operand MEM's address contains an immediate offset from the base
16907    register and has no side effects, in which case it sets BASE,
16908    OFFSET and ALIGN accordingly.  */
16909 static bool
16910 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16911 {
16912   rtx addr;
16913
16914   gcc_assert (base != NULL && offset != NULL);
16915
16916   /* TODO: Handle more general memory operand patterns, such as
16917      PRE_DEC and PRE_INC.  */
16918
16919   if (side_effects_p (mem))
16920     return false;
16921
16922   /* Can't deal with subregs.  */
16923   if (SUBREG_P (mem))
16924     return false;
16925
16926   gcc_assert (MEM_P (mem));
16927
16928   *offset = const0_rtx;
16929   *align = MEM_ALIGN (mem);
16930
16931   addr = XEXP (mem, 0);
16932
16933   /* If addr isn't valid for DImode, then we can't handle it.  */
16934   if (!arm_legitimate_address_p (DImode, addr,
16935                                  reload_in_progress || reload_completed))
16936     return false;
16937
16938   if (REG_P (addr))
16939     {
16940       *base = addr;
16941       return true;
16942     }
16943   else if (GET_CODE (addr) == PLUS)
16944     {
16945       *base = XEXP (addr, 0);
16946       *offset = XEXP (addr, 1);
16947       return (REG_P (*base) && CONST_INT_P (*offset));
16948     }
16949
16950   return false;
16951 }
16952
16953 /* Called from a peephole2 to replace two word-size accesses with a
16954    single LDRD/STRD instruction.  Returns true iff we can generate a
16955    new instruction sequence.  That is, both accesses use the same base
16956    register and the gap between constant offsets is 4.  This function
16957    may reorder its operands to match ldrd/strd RTL templates.
16958    OPERANDS are the operands found by the peephole matcher;
16959    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16960    corresponding memory operands.  LOAD indicaates whether the access
16961    is load or store.  CONST_STORE indicates a store of constant
16962    integer values held in OPERANDS[4,5] and assumes that the pattern
16963    is of length 4 insn, for the purpose of checking dead registers.
16964    COMMUTE indicates that register operands may be reordered.  */
16965 bool
16966 gen_operands_ldrd_strd (rtx *operands, bool load,
16967                         bool const_store, bool commute)
16968 {
16969   int nops = 2;
16970   HOST_WIDE_INT offsets[2], offset, align[2];
16971   rtx base = NULL_RTX;
16972   rtx cur_base, cur_offset, tmp;
16973   int i, gap;
16974   HARD_REG_SET regset;
16975
16976   gcc_assert (!const_store || !load);
16977   /* Check that the memory references are immediate offsets from the
16978      same base register.  Extract the base register, the destination
16979      registers, and the corresponding memory offsets.  */
16980   for (i = 0; i < nops; i++)
16981     {
16982       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16983                                  &align[i]))
16984         return false;
16985
16986       if (i == 0)
16987         base = cur_base;
16988       else if (REGNO (base) != REGNO (cur_base))
16989         return false;
16990
16991       offsets[i] = INTVAL (cur_offset);
16992       if (GET_CODE (operands[i]) == SUBREG)
16993         {
16994           tmp = SUBREG_REG (operands[i]);
16995           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16996           operands[i] = tmp;
16997         }
16998     }
16999
17000   /* Make sure there is no dependency between the individual loads.  */
17001   if (load && REGNO (operands[0]) == REGNO (base))
17002     return false; /* RAW */
17003
17004   if (load && REGNO (operands[0]) == REGNO (operands[1]))
17005     return false; /* WAW */
17006
17007   /* If the same input register is used in both stores
17008      when storing different constants, try to find a free register.
17009      For example, the code
17010         mov r0, 0
17011         str r0, [r2]
17012         mov r0, 1
17013         str r0, [r2, #4]
17014      can be transformed into
17015         mov r1, 0
17016         mov r0, 1
17017         strd r1, r0, [r2]
17018      in Thumb mode assuming that r1 is free.
17019      For ARM mode do the same but only if the starting register
17020      can be made to be even.  */
17021   if (const_store
17022       && REGNO (operands[0]) == REGNO (operands[1])
17023       && INTVAL (operands[4]) != INTVAL (operands[5]))
17024     {
17025     if (TARGET_THUMB2)
17026       {
17027         CLEAR_HARD_REG_SET (regset);
17028         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17029         if (tmp == NULL_RTX)
17030           return false;
17031
17032         /* Use the new register in the first load to ensure that
17033            if the original input register is not dead after peephole,
17034            then it will have the correct constant value.  */
17035         operands[0] = tmp;
17036       }
17037     else if (TARGET_ARM)
17038       {
17039         int regno = REGNO (operands[0]);
17040         if (!peep2_reg_dead_p (4, operands[0]))
17041           {
17042             /* When the input register is even and is not dead after the
17043                pattern, it has to hold the second constant but we cannot
17044                form a legal STRD in ARM mode with this register as the second
17045                register.  */
17046             if (regno % 2 == 0)
17047               return false;
17048
17049             /* Is regno-1 free? */
17050             SET_HARD_REG_SET (regset);
17051             CLEAR_HARD_REG_BIT(regset, regno - 1);
17052             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17053             if (tmp == NULL_RTX)
17054               return false;
17055
17056             operands[0] = tmp;
17057           }
17058         else
17059           {
17060             /* Find a DImode register.  */
17061             CLEAR_HARD_REG_SET (regset);
17062             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17063             if (tmp != NULL_RTX)
17064               {
17065                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17066                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17067               }
17068             else
17069               {
17070                 /* Can we use the input register to form a DI register?  */
17071                 SET_HARD_REG_SET (regset);
17072                 CLEAR_HARD_REG_BIT(regset,
17073                                    regno % 2 == 0 ? regno + 1 : regno - 1);
17074                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17075                 if (tmp == NULL_RTX)
17076                   return false;
17077                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17078               }
17079           }
17080
17081         gcc_assert (operands[0] != NULL_RTX);
17082         gcc_assert (operands[1] != NULL_RTX);
17083         gcc_assert (REGNO (operands[0]) % 2 == 0);
17084         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17085       }
17086     }
17087
17088   /* Make sure the instructions are ordered with lower memory access first.  */
17089   if (offsets[0] > offsets[1])
17090     {
17091       gap = offsets[0] - offsets[1];
17092       offset = offsets[1];
17093
17094       /* Swap the instructions such that lower memory is accessed first.  */
17095       std::swap (operands[0], operands[1]);
17096       std::swap (operands[2], operands[3]);
17097       std::swap (align[0], align[1]);
17098       if (const_store)
17099         std::swap (operands[4], operands[5]);
17100     }
17101   else
17102     {
17103       gap = offsets[1] - offsets[0];
17104       offset = offsets[0];
17105     }
17106
17107   /* Make sure accesses are to consecutive memory locations.  */
17108   if (gap != GET_MODE_SIZE (SImode))
17109     return false;
17110
17111   if (!align_ok_ldrd_strd (align[0], offset))
17112     return false;
17113
17114   /* Make sure we generate legal instructions.  */
17115   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17116                              false, load))
17117     return true;
17118
17119   /* In Thumb state, where registers are almost unconstrained, there
17120      is little hope to fix it.  */
17121   if (TARGET_THUMB2)
17122     return false;
17123
17124   if (load && commute)
17125     {
17126       /* Try reordering registers.  */
17127       std::swap (operands[0], operands[1]);
17128       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17129                                  false, load))
17130         return true;
17131     }
17132
17133   if (const_store)
17134     {
17135       /* If input registers are dead after this pattern, they can be
17136          reordered or replaced by other registers that are free in the
17137          current pattern.  */
17138       if (!peep2_reg_dead_p (4, operands[0])
17139           || !peep2_reg_dead_p (4, operands[1]))
17140         return false;
17141
17142       /* Try to reorder the input registers.  */
17143       /* For example, the code
17144            mov r0, 0
17145            mov r1, 1
17146            str r1, [r2]
17147            str r0, [r2, #4]
17148          can be transformed into
17149            mov r1, 0
17150            mov r0, 1
17151            strd r0, [r2]
17152       */
17153       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17154                                   false, false))
17155         {
17156           std::swap (operands[0], operands[1]);
17157           return true;
17158         }
17159
17160       /* Try to find a free DI register.  */
17161       CLEAR_HARD_REG_SET (regset);
17162       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17163       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17164       while (true)
17165         {
17166           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17167           if (tmp == NULL_RTX)
17168             return false;
17169
17170           /* DREG must be an even-numbered register in DImode.
17171              Split it into SI registers.  */
17172           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17173           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17174           gcc_assert (operands[0] != NULL_RTX);
17175           gcc_assert (operands[1] != NULL_RTX);
17176           gcc_assert (REGNO (operands[0]) % 2 == 0);
17177           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17178
17179           return (operands_ok_ldrd_strd (operands[0], operands[1],
17180                                          base, offset,
17181                                          false, load));
17182         }
17183     }
17184
17185   return false;
17186 }
17187
17188
17189 /* Return true if parallel execution of the two word-size accesses provided
17190    could be satisfied with a single LDRD/STRD instruction.  Two word-size
17191    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17192    register operands and OPERANDS[2,3] are the corresponding memory operands.
17193    */
17194 bool
17195 valid_operands_ldrd_strd (rtx *operands, bool load)
17196 {
17197   int nops = 2;
17198   HOST_WIDE_INT offsets[2], offset, align[2];
17199   rtx base = NULL_RTX;
17200   rtx cur_base, cur_offset;
17201   int i, gap;
17202
17203   /* Check that the memory references are immediate offsets from the
17204      same base register.  Extract the base register, the destination
17205      registers, and the corresponding memory offsets.  */
17206   for (i = 0; i < nops; i++)
17207     {
17208       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17209                                  &align[i]))
17210         return false;
17211
17212       if (i == 0)
17213         base = cur_base;
17214       else if (REGNO (base) != REGNO (cur_base))
17215         return false;
17216
17217       offsets[i] = INTVAL (cur_offset);
17218       if (GET_CODE (operands[i]) == SUBREG)
17219         return false;
17220     }
17221
17222   if (offsets[0] > offsets[1])
17223     return false;
17224
17225   gap = offsets[1] - offsets[0];
17226   offset = offsets[0];
17227
17228   /* Make sure accesses are to consecutive memory locations.  */
17229   if (gap != GET_MODE_SIZE (SImode))
17230     return false;
17231
17232   if (!align_ok_ldrd_strd (align[0], offset))
17233     return false;
17234
17235   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17236                                 false, load);
17237 }
17238
17239 \f
17240 /* Print a symbolic form of X to the debug file, F.  */
17241 static void
17242 arm_print_value (FILE *f, rtx x)
17243 {
17244   switch (GET_CODE (x))
17245     {
17246     case CONST_INT:
17247       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17248       return;
17249
17250     case CONST_DOUBLE:
17251       {
17252         char fpstr[20];
17253         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17254                          sizeof (fpstr), 0, 1);
17255         fputs (fpstr, f);
17256       }
17257       return;
17258
17259     case CONST_VECTOR:
17260       {
17261         int i;
17262
17263         fprintf (f, "<");
17264         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17265           {
17266             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17267             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17268               fputc (',', f);
17269           }
17270         fprintf (f, ">");
17271       }
17272       return;
17273
17274     case CONST_STRING:
17275       fprintf (f, "\"%s\"", XSTR (x, 0));
17276       return;
17277
17278     case SYMBOL_REF:
17279       fprintf (f, "`%s'", XSTR (x, 0));
17280       return;
17281
17282     case LABEL_REF:
17283       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17284       return;
17285
17286     case CONST:
17287       arm_print_value (f, XEXP (x, 0));
17288       return;
17289
17290     case PLUS:
17291       arm_print_value (f, XEXP (x, 0));
17292       fprintf (f, "+");
17293       arm_print_value (f, XEXP (x, 1));
17294       return;
17295
17296     case PC:
17297       fprintf (f, "pc");
17298       return;
17299
17300     default:
17301       fprintf (f, "????");
17302       return;
17303     }
17304 }
17305 \f
17306 /* Routines for manipulation of the constant pool.  */
17307
17308 /* Arm instructions cannot load a large constant directly into a
17309    register; they have to come from a pc relative load.  The constant
17310    must therefore be placed in the addressable range of the pc
17311    relative load.  Depending on the precise pc relative load
17312    instruction the range is somewhere between 256 bytes and 4k.  This
17313    means that we often have to dump a constant inside a function, and
17314    generate code to branch around it.
17315
17316    It is important to minimize this, since the branches will slow
17317    things down and make the code larger.
17318
17319    Normally we can hide the table after an existing unconditional
17320    branch so that there is no interruption of the flow, but in the
17321    worst case the code looks like this:
17322
17323         ldr     rn, L1
17324         ...
17325         b       L2
17326         align
17327         L1:     .long value
17328         L2:
17329         ...
17330
17331         ldr     rn, L3
17332         ...
17333         b       L4
17334         align
17335         L3:     .long value
17336         L4:
17337         ...
17338
17339    We fix this by performing a scan after scheduling, which notices
17340    which instructions need to have their operands fetched from the
17341    constant table and builds the table.
17342
17343    The algorithm starts by building a table of all the constants that
17344    need fixing up and all the natural barriers in the function (places
17345    where a constant table can be dropped without breaking the flow).
17346    For each fixup we note how far the pc-relative replacement will be
17347    able to reach and the offset of the instruction into the function.
17348
17349    Having built the table we then group the fixes together to form
17350    tables that are as large as possible (subject to addressing
17351    constraints) and emit each table of constants after the last
17352    barrier that is within range of all the instructions in the group.
17353    If a group does not contain a barrier, then we forcibly create one
17354    by inserting a jump instruction into the flow.  Once the table has
17355    been inserted, the insns are then modified to reference the
17356    relevant entry in the pool.
17357
17358    Possible enhancements to the algorithm (not implemented) are:
17359
17360    1) For some processors and object formats, there may be benefit in
17361    aligning the pools to the start of cache lines; this alignment
17362    would need to be taken into account when calculating addressability
17363    of a pool.  */
17364
17365 /* These typedefs are located at the start of this file, so that
17366    they can be used in the prototypes there.  This comment is to
17367    remind readers of that fact so that the following structures
17368    can be understood more easily.
17369
17370      typedef struct minipool_node    Mnode;
17371      typedef struct minipool_fixup   Mfix;  */
17372
17373 struct minipool_node
17374 {
17375   /* Doubly linked chain of entries.  */
17376   Mnode * next;
17377   Mnode * prev;
17378   /* The maximum offset into the code that this entry can be placed.  While
17379      pushing fixes for forward references, all entries are sorted in order
17380      of increasing max_address.  */
17381   HOST_WIDE_INT max_address;
17382   /* Similarly for an entry inserted for a backwards ref.  */
17383   HOST_WIDE_INT min_address;
17384   /* The number of fixes referencing this entry.  This can become zero
17385      if we "unpush" an entry.  In this case we ignore the entry when we
17386      come to emit the code.  */
17387   int refcount;
17388   /* The offset from the start of the minipool.  */
17389   HOST_WIDE_INT offset;
17390   /* The value in table.  */
17391   rtx value;
17392   /* The mode of value.  */
17393   machine_mode mode;
17394   /* The size of the value.  With iWMMXt enabled
17395      sizes > 4 also imply an alignment of 8-bytes.  */
17396   int fix_size;
17397 };
17398
17399 struct minipool_fixup
17400 {
17401   Mfix *            next;
17402   rtx_insn *        insn;
17403   HOST_WIDE_INT     address;
17404   rtx *             loc;
17405   machine_mode mode;
17406   int               fix_size;
17407   rtx               value;
17408   Mnode *           minipool;
17409   HOST_WIDE_INT     forwards;
17410   HOST_WIDE_INT     backwards;
17411 };
17412
17413 /* Fixes less than a word need padding out to a word boundary.  */
17414 #define MINIPOOL_FIX_SIZE(mode) \
17415   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17416
17417 static Mnode *  minipool_vector_head;
17418 static Mnode *  minipool_vector_tail;
17419 static rtx_code_label   *minipool_vector_label;
17420 static int      minipool_pad;
17421
17422 /* The linked list of all minipool fixes required for this function.  */
17423 Mfix *          minipool_fix_head;
17424 Mfix *          minipool_fix_tail;
17425 /* The fix entry for the current minipool, once it has been placed.  */
17426 Mfix *          minipool_barrier;
17427
17428 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17429 #define JUMP_TABLES_IN_TEXT_SECTION 0
17430 #endif
17431
17432 static HOST_WIDE_INT
17433 get_jump_table_size (rtx_jump_table_data *insn)
17434 {
17435   /* ADDR_VECs only take room if read-only data does into the text
17436      section.  */
17437   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17438     {
17439       rtx body = PATTERN (insn);
17440       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17441       HOST_WIDE_INT size;
17442       HOST_WIDE_INT modesize;
17443
17444       modesize = GET_MODE_SIZE (GET_MODE (body));
17445       size = modesize * XVECLEN (body, elt);
17446       switch (modesize)
17447         {
17448         case 1:
17449           /* Round up size  of TBB table to a halfword boundary.  */
17450           size = (size + 1) & ~HOST_WIDE_INT_1;
17451           break;
17452         case 2:
17453           /* No padding necessary for TBH.  */
17454           break;
17455         case 4:
17456           /* Add two bytes for alignment on Thumb.  */
17457           if (TARGET_THUMB)
17458             size += 2;
17459           break;
17460         default:
17461           gcc_unreachable ();
17462         }
17463       return size;
17464     }
17465
17466   return 0;
17467 }
17468
17469 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17470    function descriptor) into a register and the GOT address into the
17471    FDPIC register, returning an rtx for the register holding the
17472    function address.  */
17473
17474 rtx
17475 arm_load_function_descriptor (rtx funcdesc)
17476 {
17477   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17478   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17479   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17480   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17481
17482   emit_move_insn (fnaddr_reg, fnaddr);
17483
17484   /* The ABI requires the entry point address to be loaded first, but
17485      since we cannot support lazy binding for lack of atomic load of
17486      two 32-bits values, we do not need to bother to prevent the
17487      previous load from being moved after that of the GOT address.  */
17488   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17489
17490   return fnaddr_reg;
17491 }
17492
17493 /* Return the maximum amount of padding that will be inserted before
17494    label LABEL.  */
17495 static HOST_WIDE_INT
17496 get_label_padding (rtx label)
17497 {
17498   HOST_WIDE_INT align, min_insn_size;
17499
17500   align = 1 << label_to_alignment (label).levels[0].log;
17501   min_insn_size = TARGET_THUMB ? 2 : 4;
17502   return align > min_insn_size ? align - min_insn_size : 0;
17503 }
17504
17505 /* Move a minipool fix MP from its current location to before MAX_MP.
17506    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17507    constraints may need updating.  */
17508 static Mnode *
17509 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17510                                HOST_WIDE_INT max_address)
17511 {
17512   /* The code below assumes these are different.  */
17513   gcc_assert (mp != max_mp);
17514
17515   if (max_mp == NULL)
17516     {
17517       if (max_address < mp->max_address)
17518         mp->max_address = max_address;
17519     }
17520   else
17521     {
17522       if (max_address > max_mp->max_address - mp->fix_size)
17523         mp->max_address = max_mp->max_address - mp->fix_size;
17524       else
17525         mp->max_address = max_address;
17526
17527       /* Unlink MP from its current position.  Since max_mp is non-null,
17528        mp->prev must be non-null.  */
17529       mp->prev->next = mp->next;
17530       if (mp->next != NULL)
17531         mp->next->prev = mp->prev;
17532       else
17533         minipool_vector_tail = mp->prev;
17534
17535       /* Re-insert it before MAX_MP.  */
17536       mp->next = max_mp;
17537       mp->prev = max_mp->prev;
17538       max_mp->prev = mp;
17539
17540       if (mp->prev != NULL)
17541         mp->prev->next = mp;
17542       else
17543         minipool_vector_head = mp;
17544     }
17545
17546   /* Save the new entry.  */
17547   max_mp = mp;
17548
17549   /* Scan over the preceding entries and adjust their addresses as
17550      required.  */
17551   while (mp->prev != NULL
17552          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17553     {
17554       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17555       mp = mp->prev;
17556     }
17557
17558   return max_mp;
17559 }
17560
17561 /* Add a constant to the minipool for a forward reference.  Returns the
17562    node added or NULL if the constant will not fit in this pool.  */
17563 static Mnode *
17564 add_minipool_forward_ref (Mfix *fix)
17565 {
17566   /* If set, max_mp is the first pool_entry that has a lower
17567      constraint than the one we are trying to add.  */
17568   Mnode *       max_mp = NULL;
17569   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17570   Mnode *       mp;
17571
17572   /* If the minipool starts before the end of FIX->INSN then this FIX
17573      cannot be placed into the current pool.  Furthermore, adding the
17574      new constant pool entry may cause the pool to start FIX_SIZE bytes
17575      earlier.  */
17576   if (minipool_vector_head &&
17577       (fix->address + get_attr_length (fix->insn)
17578        >= minipool_vector_head->max_address - fix->fix_size))
17579     return NULL;
17580
17581   /* Scan the pool to see if a constant with the same value has
17582      already been added.  While we are doing this, also note the
17583      location where we must insert the constant if it doesn't already
17584      exist.  */
17585   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17586     {
17587       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17588           && fix->mode == mp->mode
17589           && (!LABEL_P (fix->value)
17590               || (CODE_LABEL_NUMBER (fix->value)
17591                   == CODE_LABEL_NUMBER (mp->value)))
17592           && rtx_equal_p (fix->value, mp->value))
17593         {
17594           /* More than one fix references this entry.  */
17595           mp->refcount++;
17596           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17597         }
17598
17599       /* Note the insertion point if necessary.  */
17600       if (max_mp == NULL
17601           && mp->max_address > max_address)
17602         max_mp = mp;
17603
17604       /* If we are inserting an 8-bytes aligned quantity and
17605          we have not already found an insertion point, then
17606          make sure that all such 8-byte aligned quantities are
17607          placed at the start of the pool.  */
17608       if (ARM_DOUBLEWORD_ALIGN
17609           && max_mp == NULL
17610           && fix->fix_size >= 8
17611           && mp->fix_size < 8)
17612         {
17613           max_mp = mp;
17614           max_address = mp->max_address;
17615         }
17616     }
17617
17618   /* The value is not currently in the minipool, so we need to create
17619      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17620      the end of the list since the placement is less constrained than
17621      any existing entry.  Otherwise, we insert the new fix before
17622      MAX_MP and, if necessary, adjust the constraints on the other
17623      entries.  */
17624   mp = XNEW (Mnode);
17625   mp->fix_size = fix->fix_size;
17626   mp->mode = fix->mode;
17627   mp->value = fix->value;
17628   mp->refcount = 1;
17629   /* Not yet required for a backwards ref.  */
17630   mp->min_address = -65536;
17631
17632   if (max_mp == NULL)
17633     {
17634       mp->max_address = max_address;
17635       mp->next = NULL;
17636       mp->prev = minipool_vector_tail;
17637
17638       if (mp->prev == NULL)
17639         {
17640           minipool_vector_head = mp;
17641           minipool_vector_label = gen_label_rtx ();
17642         }
17643       else
17644         mp->prev->next = mp;
17645
17646       minipool_vector_tail = mp;
17647     }
17648   else
17649     {
17650       if (max_address > max_mp->max_address - mp->fix_size)
17651         mp->max_address = max_mp->max_address - mp->fix_size;
17652       else
17653         mp->max_address = max_address;
17654
17655       mp->next = max_mp;
17656       mp->prev = max_mp->prev;
17657       max_mp->prev = mp;
17658       if (mp->prev != NULL)
17659         mp->prev->next = mp;
17660       else
17661         minipool_vector_head = mp;
17662     }
17663
17664   /* Save the new entry.  */
17665   max_mp = mp;
17666
17667   /* Scan over the preceding entries and adjust their addresses as
17668      required.  */
17669   while (mp->prev != NULL
17670          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17671     {
17672       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17673       mp = mp->prev;
17674     }
17675
17676   return max_mp;
17677 }
17678
17679 static Mnode *
17680 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17681                                 HOST_WIDE_INT  min_address)
17682 {
17683   HOST_WIDE_INT offset;
17684
17685   /* The code below assumes these are different.  */
17686   gcc_assert (mp != min_mp);
17687
17688   if (min_mp == NULL)
17689     {
17690       if (min_address > mp->min_address)
17691         mp->min_address = min_address;
17692     }
17693   else
17694     {
17695       /* We will adjust this below if it is too loose.  */
17696       mp->min_address = min_address;
17697
17698       /* Unlink MP from its current position.  Since min_mp is non-null,
17699          mp->next must be non-null.  */
17700       mp->next->prev = mp->prev;
17701       if (mp->prev != NULL)
17702         mp->prev->next = mp->next;
17703       else
17704         minipool_vector_head = mp->next;
17705
17706       /* Reinsert it after MIN_MP.  */
17707       mp->prev = min_mp;
17708       mp->next = min_mp->next;
17709       min_mp->next = mp;
17710       if (mp->next != NULL)
17711         mp->next->prev = mp;
17712       else
17713         minipool_vector_tail = mp;
17714     }
17715
17716   min_mp = mp;
17717
17718   offset = 0;
17719   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17720     {
17721       mp->offset = offset;
17722       if (mp->refcount > 0)
17723         offset += mp->fix_size;
17724
17725       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17726         mp->next->min_address = mp->min_address + mp->fix_size;
17727     }
17728
17729   return min_mp;
17730 }
17731
17732 /* Add a constant to the minipool for a backward reference.  Returns the
17733    node added or NULL if the constant will not fit in this pool.
17734
17735    Note that the code for insertion for a backwards reference can be
17736    somewhat confusing because the calculated offsets for each fix do
17737    not take into account the size of the pool (which is still under
17738    construction.  */
17739 static Mnode *
17740 add_minipool_backward_ref (Mfix *fix)
17741 {
17742   /* If set, min_mp is the last pool_entry that has a lower constraint
17743      than the one we are trying to add.  */
17744   Mnode *min_mp = NULL;
17745   /* This can be negative, since it is only a constraint.  */
17746   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17747   Mnode *mp;
17748
17749   /* If we can't reach the current pool from this insn, or if we can't
17750      insert this entry at the end of the pool without pushing other
17751      fixes out of range, then we don't try.  This ensures that we
17752      can't fail later on.  */
17753   if (min_address >= minipool_barrier->address
17754       || (minipool_vector_tail->min_address + fix->fix_size
17755           >= minipool_barrier->address))
17756     return NULL;
17757
17758   /* Scan the pool to see if a constant with the same value has
17759      already been added.  While we are doing this, also note the
17760      location where we must insert the constant if it doesn't already
17761      exist.  */
17762   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17763     {
17764       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17765           && fix->mode == mp->mode
17766           && (!LABEL_P (fix->value)
17767               || (CODE_LABEL_NUMBER (fix->value)
17768                   == CODE_LABEL_NUMBER (mp->value)))
17769           && rtx_equal_p (fix->value, mp->value)
17770           /* Check that there is enough slack to move this entry to the
17771              end of the table (this is conservative).  */
17772           && (mp->max_address
17773               > (minipool_barrier->address
17774                  + minipool_vector_tail->offset
17775                  + minipool_vector_tail->fix_size)))
17776         {
17777           mp->refcount++;
17778           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17779         }
17780
17781       if (min_mp != NULL)
17782         mp->min_address += fix->fix_size;
17783       else
17784         {
17785           /* Note the insertion point if necessary.  */
17786           if (mp->min_address < min_address)
17787             {
17788               /* For now, we do not allow the insertion of 8-byte alignment
17789                  requiring nodes anywhere but at the start of the pool.  */
17790               if (ARM_DOUBLEWORD_ALIGN
17791                   && fix->fix_size >= 8 && mp->fix_size < 8)
17792                 return NULL;
17793               else
17794                 min_mp = mp;
17795             }
17796           else if (mp->max_address
17797                    < minipool_barrier->address + mp->offset + fix->fix_size)
17798             {
17799               /* Inserting before this entry would push the fix beyond
17800                  its maximum address (which can happen if we have
17801                  re-located a forwards fix); force the new fix to come
17802                  after it.  */
17803               if (ARM_DOUBLEWORD_ALIGN
17804                   && fix->fix_size >= 8 && mp->fix_size < 8)
17805                 return NULL;
17806               else
17807                 {
17808                   min_mp = mp;
17809                   min_address = mp->min_address + fix->fix_size;
17810                 }
17811             }
17812           /* Do not insert a non-8-byte aligned quantity before 8-byte
17813              aligned quantities.  */
17814           else if (ARM_DOUBLEWORD_ALIGN
17815                    && fix->fix_size < 8
17816                    && mp->fix_size >= 8)
17817             {
17818               min_mp = mp;
17819               min_address = mp->min_address + fix->fix_size;
17820             }
17821         }
17822     }
17823
17824   /* We need to create a new entry.  */
17825   mp = XNEW (Mnode);
17826   mp->fix_size = fix->fix_size;
17827   mp->mode = fix->mode;
17828   mp->value = fix->value;
17829   mp->refcount = 1;
17830   mp->max_address = minipool_barrier->address + 65536;
17831
17832   mp->min_address = min_address;
17833
17834   if (min_mp == NULL)
17835     {
17836       mp->prev = NULL;
17837       mp->next = minipool_vector_head;
17838
17839       if (mp->next == NULL)
17840         {
17841           minipool_vector_tail = mp;
17842           minipool_vector_label = gen_label_rtx ();
17843         }
17844       else
17845         mp->next->prev = mp;
17846
17847       minipool_vector_head = mp;
17848     }
17849   else
17850     {
17851       mp->next = min_mp->next;
17852       mp->prev = min_mp;
17853       min_mp->next = mp;
17854
17855       if (mp->next != NULL)
17856         mp->next->prev = mp;
17857       else
17858         minipool_vector_tail = mp;
17859     }
17860
17861   /* Save the new entry.  */
17862   min_mp = mp;
17863
17864   if (mp->prev)
17865     mp = mp->prev;
17866   else
17867     mp->offset = 0;
17868
17869   /* Scan over the following entries and adjust their offsets.  */
17870   while (mp->next != NULL)
17871     {
17872       if (mp->next->min_address < mp->min_address + mp->fix_size)
17873         mp->next->min_address = mp->min_address + mp->fix_size;
17874
17875       if (mp->refcount)
17876         mp->next->offset = mp->offset + mp->fix_size;
17877       else
17878         mp->next->offset = mp->offset;
17879
17880       mp = mp->next;
17881     }
17882
17883   return min_mp;
17884 }
17885
17886 static void
17887 assign_minipool_offsets (Mfix *barrier)
17888 {
17889   HOST_WIDE_INT offset = 0;
17890   Mnode *mp;
17891
17892   minipool_barrier = barrier;
17893
17894   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17895     {
17896       mp->offset = offset;
17897
17898       if (mp->refcount > 0)
17899         offset += mp->fix_size;
17900     }
17901 }
17902
17903 /* Output the literal table */
17904 static void
17905 dump_minipool (rtx_insn *scan)
17906 {
17907   Mnode * mp;
17908   Mnode * nmp;
17909   int align64 = 0;
17910
17911   if (ARM_DOUBLEWORD_ALIGN)
17912     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17913       if (mp->refcount > 0 && mp->fix_size >= 8)
17914         {
17915           align64 = 1;
17916           break;
17917         }
17918
17919   if (dump_file)
17920     fprintf (dump_file,
17921              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17922              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17923
17924   scan = emit_label_after (gen_label_rtx (), scan);
17925   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17926   scan = emit_label_after (minipool_vector_label, scan);
17927
17928   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17929     {
17930       if (mp->refcount > 0)
17931         {
17932           if (dump_file)
17933             {
17934               fprintf (dump_file,
17935                        ";;  Offset %u, min %ld, max %ld ",
17936                        (unsigned) mp->offset, (unsigned long) mp->min_address,
17937                        (unsigned long) mp->max_address);
17938               arm_print_value (dump_file, mp->value);
17939               fputc ('\n', dump_file);
17940             }
17941
17942           rtx val = copy_rtx (mp->value);
17943
17944           switch (GET_MODE_SIZE (mp->mode))
17945             {
17946 #ifdef HAVE_consttable_1
17947             case 1:
17948               scan = emit_insn_after (gen_consttable_1 (val), scan);
17949               break;
17950
17951 #endif
17952 #ifdef HAVE_consttable_2
17953             case 2:
17954               scan = emit_insn_after (gen_consttable_2 (val), scan);
17955               break;
17956
17957 #endif
17958 #ifdef HAVE_consttable_4
17959             case 4:
17960               scan = emit_insn_after (gen_consttable_4 (val), scan);
17961               break;
17962
17963 #endif
17964 #ifdef HAVE_consttable_8
17965             case 8:
17966               scan = emit_insn_after (gen_consttable_8 (val), scan);
17967               break;
17968
17969 #endif
17970 #ifdef HAVE_consttable_16
17971             case 16:
17972               scan = emit_insn_after (gen_consttable_16 (val), scan);
17973               break;
17974
17975 #endif
17976             default:
17977               gcc_unreachable ();
17978             }
17979         }
17980
17981       nmp = mp->next;
17982       free (mp);
17983     }
17984
17985   minipool_vector_head = minipool_vector_tail = NULL;
17986   scan = emit_insn_after (gen_consttable_end (), scan);
17987   scan = emit_barrier_after (scan);
17988 }
17989
17990 /* Return the cost of forcibly inserting a barrier after INSN.  */
17991 static int
17992 arm_barrier_cost (rtx_insn *insn)
17993 {
17994   /* Basing the location of the pool on the loop depth is preferable,
17995      but at the moment, the basic block information seems to be
17996      corrupt by this stage of the compilation.  */
17997   int base_cost = 50;
17998   rtx_insn *next = next_nonnote_insn (insn);
17999
18000   if (next != NULL && LABEL_P (next))
18001     base_cost -= 20;
18002
18003   switch (GET_CODE (insn))
18004     {
18005     case CODE_LABEL:
18006       /* It will always be better to place the table before the label, rather
18007          than after it.  */
18008       return 50;
18009
18010     case INSN:
18011     case CALL_INSN:
18012       return base_cost;
18013
18014     case JUMP_INSN:
18015       return base_cost - 10;
18016
18017     default:
18018       return base_cost + 10;
18019     }
18020 }
18021
18022 /* Find the best place in the insn stream in the range
18023    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18024    Create the barrier by inserting a jump and add a new fix entry for
18025    it.  */
18026 static Mfix *
18027 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18028 {
18029   HOST_WIDE_INT count = 0;
18030   rtx_barrier *barrier;
18031   rtx_insn *from = fix->insn;
18032   /* The instruction after which we will insert the jump.  */
18033   rtx_insn *selected = NULL;
18034   int selected_cost;
18035   /* The address at which the jump instruction will be placed.  */
18036   HOST_WIDE_INT selected_address;
18037   Mfix * new_fix;
18038   HOST_WIDE_INT max_count = max_address - fix->address;
18039   rtx_code_label *label = gen_label_rtx ();
18040
18041   selected_cost = arm_barrier_cost (from);
18042   selected_address = fix->address;
18043
18044   while (from && count < max_count)
18045     {
18046       rtx_jump_table_data *tmp;
18047       int new_cost;
18048
18049       /* This code shouldn't have been called if there was a natural barrier
18050          within range.  */
18051       gcc_assert (!BARRIER_P (from));
18052
18053       /* Count the length of this insn.  This must stay in sync with the
18054          code that pushes minipool fixes.  */
18055       if (LABEL_P (from))
18056         count += get_label_padding (from);
18057       else
18058         count += get_attr_length (from);
18059
18060       /* If there is a jump table, add its length.  */
18061       if (tablejump_p (from, NULL, &tmp))
18062         {
18063           count += get_jump_table_size (tmp);
18064
18065           /* Jump tables aren't in a basic block, so base the cost on
18066              the dispatch insn.  If we select this location, we will
18067              still put the pool after the table.  */
18068           new_cost = arm_barrier_cost (from);
18069
18070           if (count < max_count
18071               && (!selected || new_cost <= selected_cost))
18072             {
18073               selected = tmp;
18074               selected_cost = new_cost;
18075               selected_address = fix->address + count;
18076             }
18077
18078           /* Continue after the dispatch table.  */
18079           from = NEXT_INSN (tmp);
18080           continue;
18081         }
18082
18083       new_cost = arm_barrier_cost (from);
18084
18085       if (count < max_count
18086           && (!selected || new_cost <= selected_cost))
18087         {
18088           selected = from;
18089           selected_cost = new_cost;
18090           selected_address = fix->address + count;
18091         }
18092
18093       from = NEXT_INSN (from);
18094     }
18095
18096   /* Make sure that we found a place to insert the jump.  */
18097   gcc_assert (selected);
18098
18099   /* Create a new JUMP_INSN that branches around a barrier.  */
18100   from = emit_jump_insn_after (gen_jump (label), selected);
18101   JUMP_LABEL (from) = label;
18102   barrier = emit_barrier_after (from);
18103   emit_label_after (label, barrier);
18104
18105   /* Create a minipool barrier entry for the new barrier.  */
18106   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18107   new_fix->insn = barrier;
18108   new_fix->address = selected_address;
18109   new_fix->next = fix->next;
18110   fix->next = new_fix;
18111
18112   return new_fix;
18113 }
18114
18115 /* Record that there is a natural barrier in the insn stream at
18116    ADDRESS.  */
18117 static void
18118 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18119 {
18120   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18121
18122   fix->insn = insn;
18123   fix->address = address;
18124
18125   fix->next = NULL;
18126   if (minipool_fix_head != NULL)
18127     minipool_fix_tail->next = fix;
18128   else
18129     minipool_fix_head = fix;
18130
18131   minipool_fix_tail = fix;
18132 }
18133
18134 /* Record INSN, which will need fixing up to load a value from the
18135    minipool.  ADDRESS is the offset of the insn since the start of the
18136    function; LOC is a pointer to the part of the insn which requires
18137    fixing; VALUE is the constant that must be loaded, which is of type
18138    MODE.  */
18139 static void
18140 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18141                    machine_mode mode, rtx value)
18142 {
18143   gcc_assert (!arm_disable_literal_pool);
18144   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18145
18146   fix->insn = insn;
18147   fix->address = address;
18148   fix->loc = loc;
18149   fix->mode = mode;
18150   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18151   fix->value = value;
18152   fix->forwards = get_attr_pool_range (insn);
18153   fix->backwards = get_attr_neg_pool_range (insn);
18154   fix->minipool = NULL;
18155
18156   /* If an insn doesn't have a range defined for it, then it isn't
18157      expecting to be reworked by this code.  Better to stop now than
18158      to generate duff assembly code.  */
18159   gcc_assert (fix->forwards || fix->backwards);
18160
18161   /* If an entry requires 8-byte alignment then assume all constant pools
18162      require 4 bytes of padding.  Trying to do this later on a per-pool
18163      basis is awkward because existing pool entries have to be modified.  */
18164   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18165     minipool_pad = 4;
18166
18167   if (dump_file)
18168     {
18169       fprintf (dump_file,
18170                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18171                GET_MODE_NAME (mode),
18172                INSN_UID (insn), (unsigned long) address,
18173                -1 * (long)fix->backwards, (long)fix->forwards);
18174       arm_print_value (dump_file, fix->value);
18175       fprintf (dump_file, "\n");
18176     }
18177
18178   /* Add it to the chain of fixes.  */
18179   fix->next = NULL;
18180
18181   if (minipool_fix_head != NULL)
18182     minipool_fix_tail->next = fix;
18183   else
18184     minipool_fix_head = fix;
18185
18186   minipool_fix_tail = fix;
18187 }
18188
18189 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18190    Returns the number of insns needed, or 99 if we always want to synthesize
18191    the value.  */
18192 int
18193 arm_max_const_double_inline_cost ()
18194 {
18195   return ((optimize_size || arm_ld_sched) ? 3 : 4);
18196 }
18197
18198 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18199    Returns the number of insns needed, or 99 if we don't know how to
18200    do it.  */
18201 int
18202 arm_const_double_inline_cost (rtx val)
18203 {
18204   rtx lowpart, highpart;
18205   machine_mode mode;
18206
18207   mode = GET_MODE (val);
18208
18209   if (mode == VOIDmode)
18210     mode = DImode;
18211
18212   gcc_assert (GET_MODE_SIZE (mode) == 8);
18213
18214   lowpart = gen_lowpart (SImode, val);
18215   highpart = gen_highpart_mode (SImode, mode, val);
18216
18217   gcc_assert (CONST_INT_P (lowpart));
18218   gcc_assert (CONST_INT_P (highpart));
18219
18220   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18221                             NULL_RTX, NULL_RTX, 0, 0)
18222           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18223                               NULL_RTX, NULL_RTX, 0, 0));
18224 }
18225
18226 /* Cost of loading a SImode constant.  */
18227 static inline int
18228 arm_const_inline_cost (enum rtx_code code, rtx val)
18229 {
18230   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18231                            NULL_RTX, NULL_RTX, 1, 0);
18232 }
18233
18234 /* Return true if it is worthwhile to split a 64-bit constant into two
18235    32-bit operations.  This is the case if optimizing for size, or
18236    if we have load delay slots, or if one 32-bit part can be done with
18237    a single data operation.  */
18238 bool
18239 arm_const_double_by_parts (rtx val)
18240 {
18241   machine_mode mode = GET_MODE (val);
18242   rtx part;
18243
18244   if (optimize_size || arm_ld_sched)
18245     return true;
18246
18247   if (mode == VOIDmode)
18248     mode = DImode;
18249
18250   part = gen_highpart_mode (SImode, mode, val);
18251
18252   gcc_assert (CONST_INT_P (part));
18253
18254   if (const_ok_for_arm (INTVAL (part))
18255       || const_ok_for_arm (~INTVAL (part)))
18256     return true;
18257
18258   part = gen_lowpart (SImode, val);
18259
18260   gcc_assert (CONST_INT_P (part));
18261
18262   if (const_ok_for_arm (INTVAL (part))
18263       || const_ok_for_arm (~INTVAL (part)))
18264     return true;
18265
18266   return false;
18267 }
18268
18269 /* Return true if it is possible to inline both the high and low parts
18270    of a 64-bit constant into 32-bit data processing instructions.  */
18271 bool
18272 arm_const_double_by_immediates (rtx val)
18273 {
18274   machine_mode mode = GET_MODE (val);
18275   rtx part;
18276
18277   if (mode == VOIDmode)
18278     mode = DImode;
18279
18280   part = gen_highpart_mode (SImode, mode, val);
18281
18282   gcc_assert (CONST_INT_P (part));
18283
18284   if (!const_ok_for_arm (INTVAL (part)))
18285     return false;
18286
18287   part = gen_lowpart (SImode, val);
18288
18289   gcc_assert (CONST_INT_P (part));
18290
18291   if (!const_ok_for_arm (INTVAL (part)))
18292     return false;
18293
18294   return true;
18295 }
18296
18297 /* Scan INSN and note any of its operands that need fixing.
18298    If DO_PUSHES is false we do not actually push any of the fixups
18299    needed.  */
18300 static void
18301 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18302 {
18303   int opno;
18304
18305   extract_constrain_insn (insn);
18306
18307   if (recog_data.n_alternatives == 0)
18308     return;
18309
18310   /* Fill in recog_op_alt with information about the constraints of
18311      this insn.  */
18312   preprocess_constraints (insn);
18313
18314   const operand_alternative *op_alt = which_op_alt ();
18315   for (opno = 0; opno < recog_data.n_operands; opno++)
18316     {
18317       /* Things we need to fix can only occur in inputs.  */
18318       if (recog_data.operand_type[opno] != OP_IN)
18319         continue;
18320
18321       /* If this alternative is a memory reference, then any mention
18322          of constants in this alternative is really to fool reload
18323          into allowing us to accept one there.  We need to fix them up
18324          now so that we output the right code.  */
18325       if (op_alt[opno].memory_ok)
18326         {
18327           rtx op = recog_data.operand[opno];
18328
18329           if (CONSTANT_P (op))
18330             {
18331               if (do_pushes)
18332                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18333                                    recog_data.operand_mode[opno], op);
18334             }
18335           else if (MEM_P (op)
18336                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18337                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18338             {
18339               if (do_pushes)
18340                 {
18341                   rtx cop = avoid_constant_pool_reference (op);
18342
18343                   /* Casting the address of something to a mode narrower
18344                      than a word can cause avoid_constant_pool_reference()
18345                      to return the pool reference itself.  That's no good to
18346                      us here.  Lets just hope that we can use the
18347                      constant pool value directly.  */
18348                   if (op == cop)
18349                     cop = get_pool_constant (XEXP (op, 0));
18350
18351                   push_minipool_fix (insn, address,
18352                                      recog_data.operand_loc[opno],
18353                                      recog_data.operand_mode[opno], cop);
18354                 }
18355
18356             }
18357         }
18358     }
18359
18360   return;
18361 }
18362
18363 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18364    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18365    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18366    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18367    or four masks, depending on whether it is being computed for a
18368    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18369    respectively.  The tree for the type of the argument or a field within an
18370    argument is passed in ARG_TYPE, the current register this argument or field
18371    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18372    argument or field starts at is passed in STARTING_BIT and the last used bit
18373    is kept in LAST_USED_BIT which is also updated accordingly.  */
18374
18375 static unsigned HOST_WIDE_INT
18376 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18377                                uint32_t * padding_bits_to_clear,
18378                                unsigned starting_bit, int * last_used_bit)
18379
18380 {
18381   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18382
18383   if (TREE_CODE (arg_type) == RECORD_TYPE)
18384     {
18385       unsigned current_bit = starting_bit;
18386       tree field;
18387       long int offset, size;
18388
18389
18390       field = TYPE_FIELDS (arg_type);
18391       while (field)
18392         {
18393           /* The offset within a structure is always an offset from
18394              the start of that structure.  Make sure we take that into the
18395              calculation of the register based offset that we use here.  */
18396           offset = starting_bit;
18397           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18398           offset %= 32;
18399
18400           /* This is the actual size of the field, for bitfields this is the
18401              bitfield width and not the container size.  */
18402           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18403
18404           if (*last_used_bit != offset)
18405             {
18406               if (offset < *last_used_bit)
18407                 {
18408                   /* This field's offset is before the 'last_used_bit', that
18409                      means this field goes on the next register.  So we need to
18410                      pad the rest of the current register and increase the
18411                      register number.  */
18412                   uint32_t mask;
18413                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18414                   mask++;
18415
18416                   padding_bits_to_clear[*regno] |= mask;
18417                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18418                   (*regno)++;
18419                 }
18420               else
18421                 {
18422                   /* Otherwise we pad the bits between the last field's end and
18423                      the start of the new field.  */
18424                   uint32_t mask;
18425
18426                   mask = ((uint32_t)-1) >> (32 - offset);
18427                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18428                   padding_bits_to_clear[*regno] |= mask;
18429                 }
18430               current_bit = offset;
18431             }
18432
18433           /* Calculate further padding bits for inner structs/unions too.  */
18434           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18435             {
18436               *last_used_bit = current_bit;
18437               not_to_clear_reg_mask
18438                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18439                                                   padding_bits_to_clear, offset,
18440                                                   last_used_bit);
18441             }
18442           else
18443             {
18444               /* Update 'current_bit' with this field's size.  If the
18445                  'current_bit' lies in a subsequent register, update 'regno' and
18446                  reset 'current_bit' to point to the current bit in that new
18447                  register.  */
18448               current_bit += size;
18449               while (current_bit >= 32)
18450                 {
18451                   current_bit-=32;
18452                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18453                   (*regno)++;
18454                 }
18455               *last_used_bit = current_bit;
18456             }
18457
18458           field = TREE_CHAIN (field);
18459         }
18460       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18461     }
18462   else if (TREE_CODE (arg_type) == UNION_TYPE)
18463     {
18464       tree field, field_t;
18465       int i, regno_t, field_size;
18466       int max_reg = -1;
18467       int max_bit = -1;
18468       uint32_t mask;
18469       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18470         = {-1, -1, -1, -1};
18471
18472       /* To compute the padding bits in a union we only consider bits as
18473          padding bits if they are always either a padding bit or fall outside a
18474          fields size for all fields in the union.  */
18475       field = TYPE_FIELDS (arg_type);
18476       while (field)
18477         {
18478           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18479             = {0U, 0U, 0U, 0U};
18480           int last_used_bit_t = *last_used_bit;
18481           regno_t = *regno;
18482           field_t = TREE_TYPE (field);
18483
18484           /* If the field's type is either a record or a union make sure to
18485              compute their padding bits too.  */
18486           if (RECORD_OR_UNION_TYPE_P (field_t))
18487             not_to_clear_reg_mask
18488               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18489                                                 &padding_bits_to_clear_t[0],
18490                                                 starting_bit, &last_used_bit_t);
18491           else
18492             {
18493               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18494               regno_t = (field_size / 32) + *regno;
18495               last_used_bit_t = (starting_bit + field_size) % 32;
18496             }
18497
18498           for (i = *regno; i < regno_t; i++)
18499             {
18500               /* For all but the last register used by this field only keep the
18501                  padding bits that were padding bits in this field.  */
18502               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18503             }
18504
18505             /* For the last register, keep all padding bits that were padding
18506                bits in this field and any padding bits that are still valid
18507                as padding bits but fall outside of this field's size.  */
18508             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18509             padding_bits_to_clear_res[regno_t]
18510               &= padding_bits_to_clear_t[regno_t] | mask;
18511
18512           /* Update the maximum size of the fields in terms of registers used
18513              ('max_reg') and the 'last_used_bit' in said register.  */
18514           if (max_reg < regno_t)
18515             {
18516               max_reg = regno_t;
18517               max_bit = last_used_bit_t;
18518             }
18519           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18520             max_bit = last_used_bit_t;
18521
18522           field = TREE_CHAIN (field);
18523         }
18524
18525       /* Update the current padding_bits_to_clear using the intersection of the
18526          padding bits of all the fields.  */
18527       for (i=*regno; i < max_reg; i++)
18528         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18529
18530       /* Do not keep trailing padding bits, we do not know yet whether this
18531          is the end of the argument.  */
18532       mask = ((uint32_t) 1 << max_bit) - 1;
18533       padding_bits_to_clear[max_reg]
18534         |= padding_bits_to_clear_res[max_reg] & mask;
18535
18536       *regno = max_reg;
18537       *last_used_bit = max_bit;
18538     }
18539   else
18540     /* This function should only be used for structs and unions.  */
18541     gcc_unreachable ();
18542
18543   return not_to_clear_reg_mask;
18544 }
18545
18546 /* In the context of ARMv8-M Security Extensions, this function is used for both
18547    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18548    registers are used when returning or passing arguments, which is then
18549    returned as a mask.  It will also compute a mask to indicate padding/unused
18550    bits for each of these registers, and passes this through the
18551    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18552    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18553    the starting register used to pass this argument or return value is passed
18554    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18555    for struct and union types.  */
18556
18557 static unsigned HOST_WIDE_INT
18558 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18559                              uint32_t * padding_bits_to_clear)
18560
18561 {
18562   int last_used_bit = 0;
18563   unsigned HOST_WIDE_INT not_to_clear_mask;
18564
18565   if (RECORD_OR_UNION_TYPE_P (arg_type))
18566     {
18567       not_to_clear_mask
18568         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18569                                          padding_bits_to_clear, 0,
18570                                          &last_used_bit);
18571
18572
18573       /* If the 'last_used_bit' is not zero, that means we are still using a
18574          part of the last 'regno'.  In such cases we must clear the trailing
18575          bits.  Otherwise we are not using regno and we should mark it as to
18576          clear.  */
18577       if (last_used_bit != 0)
18578         padding_bits_to_clear[regno]
18579           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18580       else
18581         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18582     }
18583   else
18584     {
18585       not_to_clear_mask = 0;
18586       /* We are not dealing with structs nor unions.  So these arguments may be
18587          passed in floating point registers too.  In some cases a BLKmode is
18588          used when returning or passing arguments in multiple VFP registers.  */
18589       if (GET_MODE (arg_rtx) == BLKmode)
18590         {
18591           int i, arg_regs;
18592           rtx reg;
18593
18594           /* This should really only occur when dealing with the hard-float
18595              ABI.  */
18596           gcc_assert (TARGET_HARD_FLOAT_ABI);
18597
18598           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18599             {
18600               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18601               gcc_assert (REG_P (reg));
18602
18603               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18604
18605               /* If we are dealing with DF mode, make sure we don't
18606                  clear either of the registers it addresses.  */
18607               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18608               if (arg_regs > 1)
18609                 {
18610                   unsigned HOST_WIDE_INT mask;
18611                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18612                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18613                   not_to_clear_mask |= mask;
18614                 }
18615             }
18616         }
18617       else
18618         {
18619           /* Otherwise we can rely on the MODE to determine how many registers
18620              are being used by this argument.  */
18621           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18622           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18623           if (arg_regs > 1)
18624             {
18625               unsigned HOST_WIDE_INT
18626               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18627               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18628               not_to_clear_mask |= mask;
18629             }
18630         }
18631     }
18632
18633   return not_to_clear_mask;
18634 }
18635
18636 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18637    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18638    are to be fully cleared, using the value in register CLEARING_REG if more
18639    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18640    the bits that needs to be cleared in caller-saved core registers, with
18641    SCRATCH_REG used as a scratch register for that clearing.
18642
18643    NOTE: one of three following assertions must hold:
18644    - SCRATCH_REG is a low register
18645    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18646      in TO_CLEAR_BITMAP)
18647    - CLEARING_REG is a low register.  */
18648
18649 static void
18650 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18651                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18652 {
18653   bool saved_clearing = false;
18654   rtx saved_clearing_reg = NULL_RTX;
18655   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18656
18657   gcc_assert (arm_arch_cmse);
18658
18659   if (!bitmap_empty_p (to_clear_bitmap))
18660     {
18661       minregno = bitmap_first_set_bit (to_clear_bitmap);
18662       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18663     }
18664   clearing_regno = REGNO (clearing_reg);
18665
18666   /* Clear padding bits.  */
18667   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18668   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18669     {
18670       uint64_t mask;
18671       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18672
18673       if (padding_bits_to_clear[i] == 0)
18674         continue;
18675
18676       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18677          CLEARING_REG as scratch.  */
18678       if (TARGET_THUMB1
18679           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18680         {
18681           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18682              such that we can use clearing_reg to clear the unused bits in the
18683              arguments.  */
18684           if ((clearing_regno > maxregno
18685                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18686               && !saved_clearing)
18687             {
18688               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18689               emit_move_insn (scratch_reg, clearing_reg);
18690               saved_clearing = true;
18691               saved_clearing_reg = scratch_reg;
18692             }
18693           scratch_reg = clearing_reg;
18694         }
18695
18696       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18697       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18698       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18699
18700       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18701       mask = (~padding_bits_to_clear[i]) >> 16;
18702       rtx16 = gen_int_mode (16, SImode);
18703       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18704       if (mask)
18705         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18706
18707       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18708     }
18709   if (saved_clearing)
18710     emit_move_insn (clearing_reg, saved_clearing_reg);
18711
18712
18713   /* Clear full registers.  */
18714
18715   if (TARGET_HAVE_FPCXT_CMSE)
18716     {
18717       rtvec vunspec_vec;
18718       int i, j, k, nb_regs;
18719       rtx use_seq, par, reg, set, vunspec;
18720       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18721       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18722       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18723
18724       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18725         {
18726           /* Find next register to clear and exit if none.  */
18727           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18728           if (i > maxregno)
18729             break;
18730
18731           /* Compute number of consecutive registers to clear.  */
18732           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18733                j++);
18734           nb_regs = j - i;
18735
18736           /* Create VSCCLRM RTX pattern.  */
18737           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18738           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18739           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18740                                              VUNSPEC_VSCCLRM_VPR);
18741           XVECEXP (par, 0, 0) = vunspec;
18742
18743           /* Insert VFP register clearing RTX in the pattern.  */
18744           start_sequence ();
18745           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18746             {
18747               if (!bitmap_bit_p (to_clear_bitmap, j))
18748                 continue;
18749
18750               reg = gen_rtx_REG (SFmode, j);
18751               set = gen_rtx_SET (reg, const0_rtx);
18752               XVECEXP (par, 0, k++) = set;
18753               emit_use (reg);
18754             }
18755           use_seq = get_insns ();
18756           end_sequence ();
18757
18758           emit_insn_after (use_seq, emit_insn (par));
18759         }
18760
18761       /* Get set of core registers to clear.  */
18762       bitmap_clear (core_regs_bitmap);
18763       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18764                         IP_REGNUM - R0_REGNUM + 1);
18765       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18766                   core_regs_bitmap);
18767       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18768
18769       if (bitmap_empty_p (to_clear_core_bitmap))
18770         return;
18771
18772       /* Create clrm RTX pattern.  */
18773       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18774       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18775
18776       /* Insert core register clearing RTX in the pattern.  */
18777       start_sequence ();
18778       for (j = 0, i = minregno; j < nb_regs; i++)
18779         {
18780           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18781             continue;
18782
18783           reg = gen_rtx_REG (SImode, i);
18784           set = gen_rtx_SET (reg, const0_rtx);
18785           XVECEXP (par, 0, j++) = set;
18786           emit_use (reg);
18787         }
18788
18789       /* Insert APSR register clearing RTX in the pattern
18790        * along with clobbering CC.  */
18791       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18792       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18793                                          VUNSPEC_CLRM_APSR);
18794
18795       XVECEXP (par, 0, j++) = vunspec;
18796
18797       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18798       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18799       XVECEXP (par, 0, j) = clobber;
18800
18801       use_seq = get_insns ();
18802       end_sequence ();
18803
18804       emit_insn_after (use_seq, emit_insn (par));
18805     }
18806   else
18807     {
18808       /* If not marked for clearing, clearing_reg already does not contain
18809          any secret.  */
18810       if (clearing_regno <= maxregno
18811           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18812         {
18813           emit_move_insn (clearing_reg, const0_rtx);
18814           emit_use (clearing_reg);
18815           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18816         }
18817
18818       for (regno = minregno; regno <= maxregno; regno++)
18819         {
18820           if (!bitmap_bit_p (to_clear_bitmap, regno))
18821             continue;
18822
18823           if (IS_VFP_REGNUM (regno))
18824             {
18825               /* If regno is an even vfp register and its successor is also to
18826                  be cleared, use vmov.  */
18827               if (TARGET_VFP_DOUBLE
18828                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18829                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18830                 {
18831                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18832                                   CONST1_RTX (DFmode));
18833                   emit_use (gen_rtx_REG (DFmode, regno));
18834                   regno++;
18835                 }
18836               else
18837                 {
18838                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18839                                   CONST1_RTX (SFmode));
18840                   emit_use (gen_rtx_REG (SFmode, regno));
18841                 }
18842             }
18843           else
18844             {
18845               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18846               emit_use (gen_rtx_REG (SImode, regno));
18847             }
18848         }
18849     }
18850 }
18851
18852 /* Clear core and caller-saved VFP registers not used to pass arguments before
18853    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18854    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18855    libgcc/config/arm/cmse_nonsecure_call.S.  */
18856
18857 static void
18858 cmse_nonsecure_call_inline_register_clear (void)
18859 {
18860   basic_block bb;
18861
18862   FOR_EACH_BB_FN (bb, cfun)
18863     {
18864       rtx_insn *insn;
18865
18866       FOR_BB_INSNS (bb, insn)
18867         {
18868           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18869           /* frame = VFP regs + FPSCR + VPR.  */
18870           unsigned lazy_store_stack_frame_size
18871             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18872           unsigned long callee_saved_mask
18873             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18874             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18875           unsigned address_regnum, regno;
18876           unsigned max_int_regno
18877             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18878           unsigned max_fp_regno
18879             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18880           unsigned maxregno
18881             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18882           auto_sbitmap to_clear_bitmap (maxregno + 1);
18883           rtx_insn *seq;
18884           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18885           rtx address;
18886           CUMULATIVE_ARGS args_so_far_v;
18887           cumulative_args_t args_so_far;
18888           tree arg_type, fntype;
18889           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18890           function_args_iterator args_iter;
18891           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18892
18893           if (!NONDEBUG_INSN_P (insn))
18894             continue;
18895
18896           if (!CALL_P (insn))
18897             continue;
18898
18899           pat = PATTERN (insn);
18900           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18901           call = XVECEXP (pat, 0, 0);
18902
18903           /* Get the real call RTX if the insn sets a value, ie. returns.  */
18904           if (GET_CODE (call) == SET)
18905               call = SET_SRC (call);
18906
18907           /* Check if it is a cmse_nonsecure_call.  */
18908           unspec = XEXP (call, 0);
18909           if (GET_CODE (unspec) != UNSPEC
18910               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18911             continue;
18912
18913           /* Mark registers that needs to be cleared.  Those that holds a
18914              parameter are removed from the set further below.  */
18915           bitmap_clear (to_clear_bitmap);
18916           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18917                             max_int_regno - R0_REGNUM + 1);
18918
18919           /* Only look at the caller-saved floating point registers in case of
18920              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
18921              lazy store and loads which clear both caller- and callee-saved
18922              registers.  */
18923           if (!lazy_fpclear)
18924             {
18925               auto_sbitmap float_bitmap (maxregno + 1);
18926
18927               bitmap_clear (float_bitmap);
18928               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18929                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
18930               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18931             }
18932
18933           /* Make sure the register used to hold the function address is not
18934              cleared.  */
18935           address = RTVEC_ELT (XVEC (unspec, 0), 0);
18936           gcc_assert (MEM_P (address));
18937           gcc_assert (REG_P (XEXP (address, 0)));
18938           address_regnum = REGNO (XEXP (address, 0));
18939           if (address_regnum <= max_int_regno)
18940             bitmap_clear_bit (to_clear_bitmap, address_regnum);
18941
18942           /* Set basic block of call insn so that df rescan is performed on
18943              insns inserted here.  */
18944           set_block_for_insn (insn, bb);
18945           df_set_flags (DF_DEFER_INSN_RESCAN);
18946           start_sequence ();
18947
18948           /* Make sure the scheduler doesn't schedule other insns beyond
18949              here.  */
18950           emit_insn (gen_blockage ());
18951
18952           /* Walk through all arguments and clear registers appropriately.
18953           */
18954           fntype = TREE_TYPE (MEM_EXPR (address));
18955           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18956                                     NULL_TREE);
18957           args_so_far = pack_cumulative_args (&args_so_far_v);
18958           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18959             {
18960               rtx arg_rtx;
18961               uint64_t to_clear_args_mask;
18962
18963               if (VOID_TYPE_P (arg_type))
18964                 continue;
18965
18966               function_arg_info arg (arg_type, /*named=*/true);
18967               if (!first_param)
18968                 /* ??? We should advance after processing the argument and pass
18969                    the argument we're advancing past.  */
18970                 arm_function_arg_advance (args_so_far, arg);
18971
18972               arg_rtx = arm_function_arg (args_so_far, arg);
18973               gcc_assert (REG_P (arg_rtx));
18974               to_clear_args_mask
18975                 = compute_not_to_clear_mask (arg_type, arg_rtx,
18976                                              REGNO (arg_rtx),
18977                                              &padding_bits_to_clear[0]);
18978               if (to_clear_args_mask)
18979                 {
18980                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
18981                     {
18982                       if (to_clear_args_mask & (1ULL << regno))
18983                         bitmap_clear_bit (to_clear_bitmap, regno);
18984                     }
18985                 }
18986
18987               first_param = false;
18988             }
18989
18990           /* We use right shift and left shift to clear the LSB of the address
18991              we jump to instead of using bic, to avoid having to use an extra
18992              register on Thumb-1.  */
18993           clearing_reg = XEXP (address, 0);
18994           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18995           emit_insn (gen_rtx_SET (clearing_reg, shift));
18996           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18997           emit_insn (gen_rtx_SET (clearing_reg, shift));
18998
18999           if (clear_callee_saved)
19000             {
19001               rtx push_insn =
19002                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19003               /* Disable frame debug info in push because it needs to be
19004                  disabled for pop (see below).  */
19005               RTX_FRAME_RELATED_P (push_insn) = 0;
19006
19007               /* Lazy store multiple.  */
19008               if (lazy_fpclear)
19009                 {
19010                   rtx imm;
19011                   rtx_insn *add_insn;
19012
19013                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19014                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19015                                                     stack_pointer_rtx, imm));
19016                   /* If we have the frame pointer, then it will be the
19017                      CFA reg.  Otherwise, the stack pointer is the CFA
19018                      reg, so we need to emit a CFA adjust.  */
19019                   if (!frame_pointer_needed)
19020                     arm_add_cfa_adjust_cfa_note (add_insn,
19021                                                  - lazy_store_stack_frame_size,
19022                                                  stack_pointer_rtx,
19023                                                  stack_pointer_rtx);
19024                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19025                 }
19026               /* Save VFP callee-saved registers.  */
19027               else
19028                 {
19029                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19030                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
19031                   /* Disable frame debug info in push because it needs to be
19032                      disabled for vpop (see below).  */
19033                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19034                 }
19035             }
19036
19037           /* Clear caller-saved registers that leak before doing a non-secure
19038              call.  */
19039           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19040           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19041                                 NUM_ARG_REGS, ip_reg, clearing_reg);
19042
19043           seq = get_insns ();
19044           end_sequence ();
19045           emit_insn_before (seq, insn);
19046
19047           if (TARGET_HAVE_FPCXT_CMSE)
19048             {
19049               rtx_insn *last, *pop_insn, *after = insn;
19050
19051               start_sequence ();
19052
19053               /* Lazy load multiple done as part of libcall in Armv8-M.  */
19054               if (lazy_fpclear)
19055                 {
19056                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19057                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19058                   rtx_insn *add_insn =
19059                     emit_insn (gen_addsi3 (stack_pointer_rtx,
19060                                            stack_pointer_rtx, imm));
19061                   if (!frame_pointer_needed)
19062                     arm_add_cfa_adjust_cfa_note (add_insn,
19063                                                  lazy_store_stack_frame_size,
19064                                                  stack_pointer_rtx,
19065                                                  stack_pointer_rtx);
19066                 }
19067               /* Restore VFP callee-saved registers.  */
19068               else
19069                 {
19070                   int nb_callee_saved_vfp_regs =
19071                     (max_fp_regno - D7_VFP_REGNUM) / 2;
19072                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19073                                               nb_callee_saved_vfp_regs,
19074                                               stack_pointer_rtx);
19075                   /* Disable frame debug info in vpop because the SP adjustment
19076                      is made using a CFA adjustment note while CFA used is
19077                      sometimes R7.  This then causes an assert failure in the
19078                      CFI note creation code.  */
19079                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19080                 }
19081
19082               arm_emit_multi_reg_pop (callee_saved_mask);
19083               pop_insn = get_last_insn ();
19084
19085               /* Disable frame debug info in pop because they reset the state
19086                  of popped registers to what it was at the beginning of the
19087                  function, before the prologue.  This leads to incorrect state
19088                  when doing the pop after the nonsecure call for registers that
19089                  are pushed both in prologue and before the nonsecure call.
19090
19091                  It also occasionally triggers an assert failure in CFI note
19092                  creation code when there are two codepaths to the epilogue,
19093                  one of which does not go through the nonsecure call.
19094                  Obviously this mean that debugging between the push and pop is
19095                  not reliable.  */
19096               RTX_FRAME_RELATED_P (pop_insn) = 0;
19097
19098               seq = get_insns ();
19099               last = get_last_insn ();
19100               end_sequence ();
19101
19102               emit_insn_after (seq, after);
19103
19104               /* Skip pop we have just inserted after nonsecure call, we know
19105                  it does not contain a nonsecure call.  */
19106               insn = last;
19107             }
19108         }
19109     }
19110 }
19111
19112 /* Rewrite move insn into subtract of 0 if the condition codes will
19113    be useful in next conditional jump insn.  */
19114
19115 static void
19116 thumb1_reorg (void)
19117 {
19118   basic_block bb;
19119
19120   FOR_EACH_BB_FN (bb, cfun)
19121     {
19122       rtx dest, src;
19123       rtx cmp, op0, op1, set = NULL;
19124       rtx_insn *prev, *insn = BB_END (bb);
19125       bool insn_clobbered = false;
19126
19127       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19128         insn = PREV_INSN (insn);
19129
19130       /* Find the last cbranchsi4_insn in basic block BB.  */
19131       if (insn == BB_HEAD (bb)
19132           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19133         continue;
19134
19135       /* Get the register with which we are comparing.  */
19136       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19137       op0 = XEXP (cmp, 0);
19138       op1 = XEXP (cmp, 1);
19139
19140       /* Check that comparison is against ZERO.  */
19141       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19142         continue;
19143
19144       /* Find the first flag setting insn before INSN in basic block BB.  */
19145       gcc_assert (insn != BB_HEAD (bb));
19146       for (prev = PREV_INSN (insn);
19147            (!insn_clobbered
19148             && prev != BB_HEAD (bb)
19149             && (NOTE_P (prev)
19150                 || DEBUG_INSN_P (prev)
19151                 || ((set = single_set (prev)) != NULL
19152                     && get_attr_conds (prev) == CONDS_NOCOND)));
19153            prev = PREV_INSN (prev))
19154         {
19155           if (reg_set_p (op0, prev))
19156             insn_clobbered = true;
19157         }
19158
19159       /* Skip if op0 is clobbered by insn other than prev. */
19160       if (insn_clobbered)
19161         continue;
19162
19163       if (!set)
19164         continue;
19165
19166       dest = SET_DEST (set);
19167       src = SET_SRC (set);
19168       if (!low_register_operand (dest, SImode)
19169           || !low_register_operand (src, SImode))
19170         continue;
19171
19172       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19173          in INSN.  Both src and dest of the move insn are checked.  */
19174       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19175         {
19176           dest = copy_rtx (dest);
19177           src = copy_rtx (src);
19178           src = gen_rtx_MINUS (SImode, src, const0_rtx);
19179           PATTERN (prev) = gen_rtx_SET (dest, src);
19180           INSN_CODE (prev) = -1;
19181           /* Set test register in INSN to dest.  */
19182           XEXP (cmp, 0) = copy_rtx (dest);
19183           INSN_CODE (insn) = -1;
19184         }
19185     }
19186 }
19187
19188 /* Convert instructions to their cc-clobbering variant if possible, since
19189    that allows us to use smaller encodings.  */
19190
19191 static void
19192 thumb2_reorg (void)
19193 {
19194   basic_block bb;
19195   regset_head live;
19196
19197   INIT_REG_SET (&live);
19198
19199   /* We are freeing block_for_insn in the toplev to keep compatibility
19200      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
19201   compute_bb_for_insn ();
19202   df_analyze ();
19203
19204   enum Convert_Action {SKIP, CONV, SWAP_CONV};
19205
19206   FOR_EACH_BB_FN (bb, cfun)
19207     {
19208       if ((current_tune->disparage_flag_setting_t16_encodings
19209            == tune_params::DISPARAGE_FLAGS_ALL)
19210           && optimize_bb_for_speed_p (bb))
19211         continue;
19212
19213       rtx_insn *insn;
19214       Convert_Action action = SKIP;
19215       Convert_Action action_for_partial_flag_setting
19216         = ((current_tune->disparage_flag_setting_t16_encodings
19217             != tune_params::DISPARAGE_FLAGS_NEITHER)
19218            && optimize_bb_for_speed_p (bb))
19219           ? SKIP : CONV;
19220
19221       COPY_REG_SET (&live, DF_LR_OUT (bb));
19222       df_simulate_initialize_backwards (bb, &live);
19223       FOR_BB_INSNS_REVERSE (bb, insn)
19224         {
19225           if (NONJUMP_INSN_P (insn)
19226               && !REGNO_REG_SET_P (&live, CC_REGNUM)
19227               && GET_CODE (PATTERN (insn)) == SET)
19228             {
19229               action = SKIP;
19230               rtx pat = PATTERN (insn);
19231               rtx dst = XEXP (pat, 0);
19232               rtx src = XEXP (pat, 1);
19233               rtx op0 = NULL_RTX, op1 = NULL_RTX;
19234
19235               if (UNARY_P (src) || BINARY_P (src))
19236                   op0 = XEXP (src, 0);
19237
19238               if (BINARY_P (src))
19239                   op1 = XEXP (src, 1);
19240
19241               if (low_register_operand (dst, SImode))
19242                 {
19243                   switch (GET_CODE (src))
19244                     {
19245                     case PLUS:
19246                       /* Adding two registers and storing the result
19247                          in the first source is already a 16-bit
19248                          operation.  */
19249                       if (rtx_equal_p (dst, op0)
19250                           && register_operand (op1, SImode))
19251                         break;
19252
19253                       if (low_register_operand (op0, SImode))
19254                         {
19255                           /* ADDS <Rd>,<Rn>,<Rm>  */
19256                           if (low_register_operand (op1, SImode))
19257                             action = CONV;
19258                           /* ADDS <Rdn>,#<imm8>  */
19259                           /* SUBS <Rdn>,#<imm8>  */
19260                           else if (rtx_equal_p (dst, op0)
19261                                    && CONST_INT_P (op1)
19262                                    && IN_RANGE (INTVAL (op1), -255, 255))
19263                             action = CONV;
19264                           /* ADDS <Rd>,<Rn>,#<imm3>  */
19265                           /* SUBS <Rd>,<Rn>,#<imm3>  */
19266                           else if (CONST_INT_P (op1)
19267                                    && IN_RANGE (INTVAL (op1), -7, 7))
19268                             action = CONV;
19269                         }
19270                       /* ADCS <Rd>, <Rn>  */
19271                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19272                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19273                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19274                                                        SImode)
19275                               && COMPARISON_P (op1)
19276                               && cc_register (XEXP (op1, 0), VOIDmode)
19277                               && maybe_get_arm_condition_code (op1) == ARM_CS
19278                               && XEXP (op1, 1) == const0_rtx)
19279                         action = CONV;
19280                       break;
19281
19282                     case MINUS:
19283                       /* RSBS <Rd>,<Rn>,#0
19284                          Not handled here: see NEG below.  */
19285                       /* SUBS <Rd>,<Rn>,#<imm3>
19286                          SUBS <Rdn>,#<imm8>
19287                          Not handled here: see PLUS above.  */
19288                       /* SUBS <Rd>,<Rn>,<Rm>  */
19289                       if (low_register_operand (op0, SImode)
19290                           && low_register_operand (op1, SImode))
19291                             action = CONV;
19292                       break;
19293
19294                     case MULT:
19295                       /* MULS <Rdm>,<Rn>,<Rdm>
19296                          As an exception to the rule, this is only used
19297                          when optimizing for size since MULS is slow on all
19298                          known implementations.  We do not even want to use
19299                          MULS in cold code, if optimizing for speed, so we
19300                          test the global flag here.  */
19301                       if (!optimize_size)
19302                         break;
19303                       /* Fall through.  */
19304                     case AND:
19305                     case IOR:
19306                     case XOR:
19307                       /* ANDS <Rdn>,<Rm>  */
19308                       if (rtx_equal_p (dst, op0)
19309                           && low_register_operand (op1, SImode))
19310                         action = action_for_partial_flag_setting;
19311                       else if (rtx_equal_p (dst, op1)
19312                                && low_register_operand (op0, SImode))
19313                         action = action_for_partial_flag_setting == SKIP
19314                                  ? SKIP : SWAP_CONV;
19315                       break;
19316
19317                     case ASHIFTRT:
19318                     case ASHIFT:
19319                     case LSHIFTRT:
19320                       /* ASRS <Rdn>,<Rm> */
19321                       /* LSRS <Rdn>,<Rm> */
19322                       /* LSLS <Rdn>,<Rm> */
19323                       if (rtx_equal_p (dst, op0)
19324                           && low_register_operand (op1, SImode))
19325                         action = action_for_partial_flag_setting;
19326                       /* ASRS <Rd>,<Rm>,#<imm5> */
19327                       /* LSRS <Rd>,<Rm>,#<imm5> */
19328                       /* LSLS <Rd>,<Rm>,#<imm5> */
19329                       else if (low_register_operand (op0, SImode)
19330                                && CONST_INT_P (op1)
19331                                && IN_RANGE (INTVAL (op1), 0, 31))
19332                         action = action_for_partial_flag_setting;
19333                       break;
19334
19335                     case ROTATERT:
19336                       /* RORS <Rdn>,<Rm>  */
19337                       if (rtx_equal_p (dst, op0)
19338                           && low_register_operand (op1, SImode))
19339                         action = action_for_partial_flag_setting;
19340                       break;
19341
19342                     case NOT:
19343                       /* MVNS <Rd>,<Rm>  */
19344                       if (low_register_operand (op0, SImode))
19345                         action = action_for_partial_flag_setting;
19346                       break;
19347
19348                     case NEG:
19349                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19350                       if (low_register_operand (op0, SImode))
19351                         action = CONV;
19352                       break;
19353
19354                     case CONST_INT:
19355                       /* MOVS <Rd>,#<imm8>  */
19356                       if (CONST_INT_P (src)
19357                           && IN_RANGE (INTVAL (src), 0, 255))
19358                         action = action_for_partial_flag_setting;
19359                       break;
19360
19361                     case REG:
19362                       /* MOVS and MOV<c> with registers have different
19363                          encodings, so are not relevant here.  */
19364                       break;
19365
19366                     default:
19367                       break;
19368                     }
19369                 }
19370
19371               if (action != SKIP)
19372                 {
19373                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19374                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19375                   rtvec vec;
19376
19377                   if (action == SWAP_CONV)
19378                     {
19379                       src = copy_rtx (src);
19380                       XEXP (src, 0) = op1;
19381                       XEXP (src, 1) = op0;
19382                       pat = gen_rtx_SET (dst, src);
19383                       vec = gen_rtvec (2, pat, clobber);
19384                     }
19385                   else /* action == CONV */
19386                     vec = gen_rtvec (2, pat, clobber);
19387
19388                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19389                   INSN_CODE (insn) = -1;
19390                 }
19391             }
19392
19393           if (NONDEBUG_INSN_P (insn))
19394             df_simulate_one_insn_backwards (bb, insn, &live);
19395         }
19396     }
19397
19398   CLEAR_REG_SET (&live);
19399 }
19400
19401 /* Gcc puts the pool in the wrong place for ARM, since we can only
19402    load addresses a limited distance around the pc.  We do some
19403    special munging to move the constant pool values to the correct
19404    point in the code.  */
19405 static void
19406 arm_reorg (void)
19407 {
19408   rtx_insn *insn;
19409   HOST_WIDE_INT address = 0;
19410   Mfix * fix;
19411
19412   if (use_cmse)
19413     cmse_nonsecure_call_inline_register_clear ();
19414
19415   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19416   if (cfun->is_thunk)
19417     ;
19418   else if (TARGET_THUMB1)
19419     thumb1_reorg ();
19420   else if (TARGET_THUMB2)
19421     thumb2_reorg ();
19422
19423   /* Ensure all insns that must be split have been split at this point.
19424      Otherwise, the pool placement code below may compute incorrect
19425      insn lengths.  Note that when optimizing, all insns have already
19426      been split at this point.  */
19427   if (!optimize)
19428     split_all_insns_noflow ();
19429
19430   /* Make sure we do not attempt to create a literal pool even though it should
19431      no longer be necessary to create any.  */
19432   if (arm_disable_literal_pool)
19433     return ;
19434
19435   minipool_fix_head = minipool_fix_tail = NULL;
19436
19437   /* The first insn must always be a note, or the code below won't
19438      scan it properly.  */
19439   insn = get_insns ();
19440   gcc_assert (NOTE_P (insn));
19441   minipool_pad = 0;
19442
19443   /* Scan all the insns and record the operands that will need fixing.  */
19444   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19445     {
19446       if (BARRIER_P (insn))
19447         push_minipool_barrier (insn, address);
19448       else if (INSN_P (insn))
19449         {
19450           rtx_jump_table_data *table;
19451
19452           note_invalid_constants (insn, address, true);
19453           address += get_attr_length (insn);
19454
19455           /* If the insn is a vector jump, add the size of the table
19456              and skip the table.  */
19457           if (tablejump_p (insn, NULL, &table))
19458             {
19459               address += get_jump_table_size (table);
19460               insn = table;
19461             }
19462         }
19463       else if (LABEL_P (insn))
19464         /* Add the worst-case padding due to alignment.  We don't add
19465            the _current_ padding because the minipool insertions
19466            themselves might change it.  */
19467         address += get_label_padding (insn);
19468     }
19469
19470   fix = minipool_fix_head;
19471
19472   /* Now scan the fixups and perform the required changes.  */
19473   while (fix)
19474     {
19475       Mfix * ftmp;
19476       Mfix * fdel;
19477       Mfix *  last_added_fix;
19478       Mfix * last_barrier = NULL;
19479       Mfix * this_fix;
19480
19481       /* Skip any further barriers before the next fix.  */
19482       while (fix && BARRIER_P (fix->insn))
19483         fix = fix->next;
19484
19485       /* No more fixes.  */
19486       if (fix == NULL)
19487         break;
19488
19489       last_added_fix = NULL;
19490
19491       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19492         {
19493           if (BARRIER_P (ftmp->insn))
19494             {
19495               if (ftmp->address >= minipool_vector_head->max_address)
19496                 break;
19497
19498               last_barrier = ftmp;
19499             }
19500           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19501             break;
19502
19503           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19504         }
19505
19506       /* If we found a barrier, drop back to that; any fixes that we
19507          could have reached but come after the barrier will now go in
19508          the next mini-pool.  */
19509       if (last_barrier != NULL)
19510         {
19511           /* Reduce the refcount for those fixes that won't go into this
19512              pool after all.  */
19513           for (fdel = last_barrier->next;
19514                fdel && fdel != ftmp;
19515                fdel = fdel->next)
19516             {
19517               fdel->minipool->refcount--;
19518               fdel->minipool = NULL;
19519             }
19520
19521           ftmp = last_barrier;
19522         }
19523       else
19524         {
19525           /* ftmp is first fix that we can't fit into this pool and
19526              there no natural barriers that we could use.  Insert a
19527              new barrier in the code somewhere between the previous
19528              fix and this one, and arrange to jump around it.  */
19529           HOST_WIDE_INT max_address;
19530
19531           /* The last item on the list of fixes must be a barrier, so
19532              we can never run off the end of the list of fixes without
19533              last_barrier being set.  */
19534           gcc_assert (ftmp);
19535
19536           max_address = minipool_vector_head->max_address;
19537           /* Check that there isn't another fix that is in range that
19538              we couldn't fit into this pool because the pool was
19539              already too large: we need to put the pool before such an
19540              instruction.  The pool itself may come just after the
19541              fix because create_fix_barrier also allows space for a
19542              jump instruction.  */
19543           if (ftmp->address < max_address)
19544             max_address = ftmp->address + 1;
19545
19546           last_barrier = create_fix_barrier (last_added_fix, max_address);
19547         }
19548
19549       assign_minipool_offsets (last_barrier);
19550
19551       while (ftmp)
19552         {
19553           if (!BARRIER_P (ftmp->insn)
19554               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19555                   == NULL))
19556             break;
19557
19558           ftmp = ftmp->next;
19559         }
19560
19561       /* Scan over the fixes we have identified for this pool, fixing them
19562          up and adding the constants to the pool itself.  */
19563       for (this_fix = fix; this_fix && ftmp != this_fix;
19564            this_fix = this_fix->next)
19565         if (!BARRIER_P (this_fix->insn))
19566           {
19567             rtx addr
19568               = plus_constant (Pmode,
19569                                gen_rtx_LABEL_REF (VOIDmode,
19570                                                   minipool_vector_label),
19571                                this_fix->minipool->offset);
19572             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19573           }
19574
19575       dump_minipool (last_barrier->insn);
19576       fix = ftmp;
19577     }
19578
19579   /* From now on we must synthesize any constants that we can't handle
19580      directly.  This can happen if the RTL gets split during final
19581      instruction generation.  */
19582   cfun->machine->after_arm_reorg = 1;
19583
19584   /* Free the minipool memory.  */
19585   obstack_free (&minipool_obstack, minipool_startobj);
19586 }
19587 \f
19588 /* Routines to output assembly language.  */
19589
19590 /* Return string representation of passed in real value.  */
19591 static const char *
19592 fp_const_from_val (REAL_VALUE_TYPE *r)
19593 {
19594   if (!fp_consts_inited)
19595     init_fp_table ();
19596
19597   gcc_assert (real_equal (r, &value_fp0));
19598   return "0";
19599 }
19600
19601 /* OPERANDS[0] is the entire list of insns that constitute pop,
19602    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19603    is in the list, UPDATE is true iff the list contains explicit
19604    update of base register.  */
19605 void
19606 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19607                          bool update)
19608 {
19609   int i;
19610   char pattern[100];
19611   int offset;
19612   const char *conditional;
19613   int num_saves = XVECLEN (operands[0], 0);
19614   unsigned int regno;
19615   unsigned int regno_base = REGNO (operands[1]);
19616   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19617
19618   offset = 0;
19619   offset += update ? 1 : 0;
19620   offset += return_pc ? 1 : 0;
19621
19622   /* Is the base register in the list?  */
19623   for (i = offset; i < num_saves; i++)
19624     {
19625       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19626       /* If SP is in the list, then the base register must be SP.  */
19627       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19628       /* If base register is in the list, there must be no explicit update.  */
19629       if (regno == regno_base)
19630         gcc_assert (!update);
19631     }
19632
19633   conditional = reverse ? "%?%D0" : "%?%d0";
19634   /* Can't use POP if returning from an interrupt.  */
19635   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19636     sprintf (pattern, "pop%s\t{", conditional);
19637   else
19638     {
19639       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19640          It's just a convention, their semantics are identical.  */
19641       if (regno_base == SP_REGNUM)
19642         sprintf (pattern, "ldmfd%s\t", conditional);
19643       else if (update)
19644         sprintf (pattern, "ldmia%s\t", conditional);
19645       else
19646         sprintf (pattern, "ldm%s\t", conditional);
19647
19648       strcat (pattern, reg_names[regno_base]);
19649       if (update)
19650         strcat (pattern, "!, {");
19651       else
19652         strcat (pattern, ", {");
19653     }
19654
19655   /* Output the first destination register.  */
19656   strcat (pattern,
19657           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19658
19659   /* Output the rest of the destination registers.  */
19660   for (i = offset + 1; i < num_saves; i++)
19661     {
19662       strcat (pattern, ", ");
19663       strcat (pattern,
19664               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19665     }
19666
19667   strcat (pattern, "}");
19668
19669   if (interrupt_p && return_pc)
19670     strcat (pattern, "^");
19671
19672   output_asm_insn (pattern, &cond);
19673 }
19674
19675
19676 /* Output the assembly for a store multiple.  */
19677
19678 const char *
19679 vfp_output_vstmd (rtx * operands)
19680 {
19681   char pattern[100];
19682   int p;
19683   int base;
19684   int i;
19685   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19686                    ? XEXP (operands[0], 0)
19687                    : XEXP (XEXP (operands[0], 0), 0);
19688   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19689
19690   if (push_p)
19691     strcpy (pattern, "vpush%?.64\t{%P1");
19692   else
19693     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19694
19695   p = strlen (pattern);
19696
19697   gcc_assert (REG_P (operands[1]));
19698
19699   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19700   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19701     {
19702       p += sprintf (&pattern[p], ", d%d", base + i);
19703     }
19704   strcpy (&pattern[p], "}");
19705
19706   output_asm_insn (pattern, operands);
19707   return "";
19708 }
19709
19710
19711 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19712    number of bytes pushed.  */
19713
19714 static int
19715 vfp_emit_fstmd (int base_reg, int count)
19716 {
19717   rtx par;
19718   rtx dwarf;
19719   rtx tmp, reg;
19720   int i;
19721
19722   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19723      register pairs are stored by a store multiple insn.  We avoid this
19724      by pushing an extra pair.  */
19725   if (count == 2 && !arm_arch6)
19726     {
19727       if (base_reg == LAST_VFP_REGNUM - 3)
19728         base_reg -= 2;
19729       count++;
19730     }
19731
19732   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19733      larger stores into multiple parts (up to a maximum of two, in
19734      practice).  */
19735   if (count > 16)
19736     {
19737       int saved;
19738       /* NOTE: base_reg is an internal register number, so each D register
19739          counts as 2.  */
19740       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19741       saved += vfp_emit_fstmd (base_reg, 16);
19742       return saved;
19743     }
19744
19745   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19746   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19747
19748   reg = gen_rtx_REG (DFmode, base_reg);
19749   base_reg += 2;
19750
19751   XVECEXP (par, 0, 0)
19752     = gen_rtx_SET (gen_frame_mem
19753                    (BLKmode,
19754                     gen_rtx_PRE_MODIFY (Pmode,
19755                                         stack_pointer_rtx,
19756                                         plus_constant
19757                                         (Pmode, stack_pointer_rtx,
19758                                          - (count * 8)))
19759                     ),
19760                    gen_rtx_UNSPEC (BLKmode,
19761                                    gen_rtvec (1, reg),
19762                                    UNSPEC_PUSH_MULT));
19763
19764   tmp = gen_rtx_SET (stack_pointer_rtx,
19765                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19766   RTX_FRAME_RELATED_P (tmp) = 1;
19767   XVECEXP (dwarf, 0, 0) = tmp;
19768
19769   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19770   RTX_FRAME_RELATED_P (tmp) = 1;
19771   XVECEXP (dwarf, 0, 1) = tmp;
19772
19773   for (i = 1; i < count; i++)
19774     {
19775       reg = gen_rtx_REG (DFmode, base_reg);
19776       base_reg += 2;
19777       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19778
19779       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19780                                         plus_constant (Pmode,
19781                                                        stack_pointer_rtx,
19782                                                        i * 8)),
19783                          reg);
19784       RTX_FRAME_RELATED_P (tmp) = 1;
19785       XVECEXP (dwarf, 0, i + 1) = tmp;
19786     }
19787
19788   par = emit_insn (par);
19789   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19790   RTX_FRAME_RELATED_P (par) = 1;
19791
19792   return count * 8;
19793 }
19794
19795 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19796    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19797
19798 bool
19799 detect_cmse_nonsecure_call (tree addr)
19800 {
19801   if (!addr)
19802     return FALSE;
19803
19804   tree fntype = TREE_TYPE (addr);
19805   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19806                                     TYPE_ATTRIBUTES (fntype)))
19807     return TRUE;
19808   return FALSE;
19809 }
19810
19811
19812 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19813    the call target.  */
19814
19815 void
19816 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19817 {
19818   rtx insn;
19819
19820   insn = emit_call_insn (pat);
19821
19822   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19823      If the call might use such an entry, add a use of the PIC register
19824      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19825   if (TARGET_VXWORKS_RTP
19826       && flag_pic
19827       && !sibcall
19828       && SYMBOL_REF_P (addr)
19829       && (SYMBOL_REF_DECL (addr)
19830           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19831           : !SYMBOL_REF_LOCAL_P (addr)))
19832     {
19833       require_pic_register (NULL_RTX, false /*compute_now*/);
19834       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19835     }
19836
19837   if (TARGET_FDPIC)
19838     {
19839       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19840       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19841     }
19842
19843   if (TARGET_AAPCS_BASED)
19844     {
19845       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19846          linker.  We need to add an IP clobber to allow setting
19847          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19848          is not needed since it's a fixed register.  */
19849       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19850       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19851     }
19852 }
19853
19854 /* Output a 'call' insn.  */
19855 const char *
19856 output_call (rtx *operands)
19857 {
19858   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19859
19860   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19861   if (REGNO (operands[0]) == LR_REGNUM)
19862     {
19863       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19864       output_asm_insn ("mov%?\t%0, %|lr", operands);
19865     }
19866
19867   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19868
19869   if (TARGET_INTERWORK || arm_arch4t)
19870     output_asm_insn ("bx%?\t%0", operands);
19871   else
19872     output_asm_insn ("mov%?\t%|pc, %0", operands);
19873
19874   return "";
19875 }
19876
19877 /* Output a move from arm registers to arm registers of a long double
19878    OPERANDS[0] is the destination.
19879    OPERANDS[1] is the source.  */
19880 const char *
19881 output_mov_long_double_arm_from_arm (rtx *operands)
19882 {
19883   /* We have to be careful here because the two might overlap.  */
19884   int dest_start = REGNO (operands[0]);
19885   int src_start = REGNO (operands[1]);
19886   rtx ops[2];
19887   int i;
19888
19889   if (dest_start < src_start)
19890     {
19891       for (i = 0; i < 3; i++)
19892         {
19893           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19894           ops[1] = gen_rtx_REG (SImode, src_start + i);
19895           output_asm_insn ("mov%?\t%0, %1", ops);
19896         }
19897     }
19898   else
19899     {
19900       for (i = 2; i >= 0; i--)
19901         {
19902           ops[0] = gen_rtx_REG (SImode, dest_start + i);
19903           ops[1] = gen_rtx_REG (SImode, src_start + i);
19904           output_asm_insn ("mov%?\t%0, %1", ops);
19905         }
19906     }
19907
19908   return "";
19909 }
19910
19911 void
19912 arm_emit_movpair (rtx dest, rtx src)
19913  {
19914   /* If the src is an immediate, simplify it.  */
19915   if (CONST_INT_P (src))
19916     {
19917       HOST_WIDE_INT val = INTVAL (src);
19918       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19919       if ((val >> 16) & 0x0000ffff)
19920         {
19921           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19922                                                GEN_INT (16)),
19923                          GEN_INT ((val >> 16) & 0x0000ffff));
19924           rtx_insn *insn = get_last_insn ();
19925           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19926         }
19927       return;
19928     }
19929    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19930    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19931    rtx_insn *insn = get_last_insn ();
19932    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19933  }
19934
19935 /* Output a move between double words.  It must be REG<-MEM
19936    or MEM<-REG.  */
19937 const char *
19938 output_move_double (rtx *operands, bool emit, int *count)
19939 {
19940   enum rtx_code code0 = GET_CODE (operands[0]);
19941   enum rtx_code code1 = GET_CODE (operands[1]);
19942   rtx otherops[3];
19943   if (count)
19944     *count = 1;
19945
19946   /* The only case when this might happen is when
19947      you are looking at the length of a DImode instruction
19948      that has an invalid constant in it.  */
19949   if (code0 == REG && code1 != MEM)
19950     {
19951       gcc_assert (!emit);
19952       *count = 2;
19953       return "";
19954     }
19955
19956   if (code0 == REG)
19957     {
19958       unsigned int reg0 = REGNO (operands[0]);
19959       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19960
19961       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19962
19963       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
19964
19965       switch (GET_CODE (XEXP (operands[1], 0)))
19966         {
19967         case REG:
19968
19969           if (emit)
19970             {
19971               if (can_ldrd
19972                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19973                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19974               else
19975                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19976             }
19977           break;
19978
19979         case PRE_INC:
19980           gcc_assert (can_ldrd);
19981           if (emit)
19982             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19983           break;
19984
19985         case PRE_DEC:
19986           if (emit)
19987             {
19988               if (can_ldrd)
19989                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19990               else
19991                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19992             }
19993           break;
19994
19995         case POST_INC:
19996           if (emit)
19997             {
19998               if (can_ldrd)
19999                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20000               else
20001                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20002             }
20003           break;
20004
20005         case POST_DEC:
20006           gcc_assert (can_ldrd);
20007           if (emit)
20008             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20009           break;
20010
20011         case PRE_MODIFY:
20012         case POST_MODIFY:
20013           /* Autoicrement addressing modes should never have overlapping
20014              base and destination registers, and overlapping index registers
20015              are already prohibited, so this doesn't need to worry about
20016              fix_cm3_ldrd.  */
20017           otherops[0] = operands[0];
20018           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20019           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20020
20021           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20022             {
20023               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20024                 {
20025                   /* Registers overlap so split out the increment.  */
20026                   if (emit)
20027                     {
20028                       gcc_assert (can_ldrd);
20029                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
20030                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20031                     }
20032                   if (count)
20033                     *count = 2;
20034                 }
20035               else
20036                 {
20037                   /* Use a single insn if we can.
20038                      FIXME: IWMMXT allows offsets larger than ldrd can
20039                      handle, fix these up with a pair of ldr.  */
20040                   if (can_ldrd
20041                       && (TARGET_THUMB2
20042                       || !CONST_INT_P (otherops[2])
20043                       || (INTVAL (otherops[2]) > -256
20044                           && INTVAL (otherops[2]) < 256)))
20045                     {
20046                       if (emit)
20047                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20048                     }
20049                   else
20050                     {
20051                       if (emit)
20052                         {
20053                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20054                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20055                         }
20056                       if (count)
20057                         *count = 2;
20058
20059                     }
20060                 }
20061             }
20062           else
20063             {
20064               /* Use a single insn if we can.
20065                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
20066                  fix these up with a pair of ldr.  */
20067               if (can_ldrd
20068                   && (TARGET_THUMB2
20069                   || !CONST_INT_P (otherops[2])
20070                   || (INTVAL (otherops[2]) > -256
20071                       && INTVAL (otherops[2]) < 256)))
20072                 {
20073                   if (emit)
20074                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20075                 }
20076               else
20077                 {
20078                   if (emit)
20079                     {
20080                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20081                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20082                     }
20083                   if (count)
20084                     *count = 2;
20085                 }
20086             }
20087           break;
20088
20089         case LABEL_REF:
20090         case CONST:
20091           /* We might be able to use ldrd %0, %1 here.  However the range is
20092              different to ldr/adr, and it is broken on some ARMv7-M
20093              implementations.  */
20094           /* Use the second register of the pair to avoid problematic
20095              overlap.  */
20096           otherops[1] = operands[1];
20097           if (emit)
20098             output_asm_insn ("adr%?\t%0, %1", otherops);
20099           operands[1] = otherops[0];
20100           if (emit)
20101             {
20102               if (can_ldrd)
20103                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20104               else
20105                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20106             }
20107
20108           if (count)
20109             *count = 2;
20110           break;
20111
20112           /* ??? This needs checking for thumb2.  */
20113         default:
20114           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20115                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20116             {
20117               otherops[0] = operands[0];
20118               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20119               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20120
20121               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20122                 {
20123                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20124                     {
20125                       switch ((int) INTVAL (otherops[2]))
20126                         {
20127                         case -8:
20128                           if (emit)
20129                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20130                           return "";
20131                         case -4:
20132                           if (TARGET_THUMB2)
20133                             break;
20134                           if (emit)
20135                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20136                           return "";
20137                         case 4:
20138                           if (TARGET_THUMB2)
20139                             break;
20140                           if (emit)
20141                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20142                           return "";
20143                         }
20144                     }
20145                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20146                   operands[1] = otherops[0];
20147                   if (can_ldrd
20148                       && (REG_P (otherops[2])
20149                           || TARGET_THUMB2
20150                           || (CONST_INT_P (otherops[2])
20151                               && INTVAL (otherops[2]) > -256
20152                               && INTVAL (otherops[2]) < 256)))
20153                     {
20154                       if (reg_overlap_mentioned_p (operands[0],
20155                                                    otherops[2]))
20156                         {
20157                           /* Swap base and index registers over to
20158                              avoid a conflict.  */
20159                           std::swap (otherops[1], otherops[2]);
20160                         }
20161                       /* If both registers conflict, it will usually
20162                          have been fixed by a splitter.  */
20163                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
20164                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20165                         {
20166                           if (emit)
20167                             {
20168                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
20169                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20170                             }
20171                           if (count)
20172                             *count = 2;
20173                         }
20174                       else
20175                         {
20176                           otherops[0] = operands[0];
20177                           if (emit)
20178                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20179                         }
20180                       return "";
20181                     }
20182
20183                   if (CONST_INT_P (otherops[2]))
20184                     {
20185                       if (emit)
20186                         {
20187                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20188                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20189                           else
20190                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
20191                         }
20192                     }
20193                   else
20194                     {
20195                       if (emit)
20196                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
20197                     }
20198                 }
20199               else
20200                 {
20201                   if (emit)
20202                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20203                 }
20204
20205               if (count)
20206                 *count = 2;
20207
20208               if (can_ldrd)
20209                 return "ldrd%?\t%0, [%1]";
20210
20211               return "ldmia%?\t%1, %M0";
20212             }
20213           else
20214             {
20215               otherops[1] = adjust_address (operands[1], SImode, 4);
20216               /* Take care of overlapping base/data reg.  */
20217               if (reg_mentioned_p (operands[0], operands[1]))
20218                 {
20219                   if (emit)
20220                     {
20221                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20222                       output_asm_insn ("ldr%?\t%0, %1", operands);
20223                     }
20224                   if (count)
20225                     *count = 2;
20226
20227                 }
20228               else
20229                 {
20230                   if (emit)
20231                     {
20232                       output_asm_insn ("ldr%?\t%0, %1", operands);
20233                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20234                     }
20235                   if (count)
20236                     *count = 2;
20237                 }
20238             }
20239         }
20240     }
20241   else
20242     {
20243       /* Constraints should ensure this.  */
20244       gcc_assert (code0 == MEM && code1 == REG);
20245       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20246                   || (TARGET_ARM && TARGET_LDRD));
20247
20248       /* For TARGET_ARM the first source register of an STRD
20249          must be even.  This is usually the case for double-word
20250          values but user assembly constraints can force an odd
20251          starting register.  */
20252       bool allow_strd = TARGET_LDRD
20253                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20254       switch (GET_CODE (XEXP (operands[0], 0)))
20255         {
20256         case REG:
20257           if (emit)
20258             {
20259               if (allow_strd)
20260                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20261               else
20262                 output_asm_insn ("stm%?\t%m0, %M1", operands);
20263             }
20264           break;
20265
20266         case PRE_INC:
20267           gcc_assert (allow_strd);
20268           if (emit)
20269             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20270           break;
20271
20272         case PRE_DEC:
20273           if (emit)
20274             {
20275               if (allow_strd)
20276                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20277               else
20278                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20279             }
20280           break;
20281
20282         case POST_INC:
20283           if (emit)
20284             {
20285               if (allow_strd)
20286                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20287               else
20288                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20289             }
20290           break;
20291
20292         case POST_DEC:
20293           gcc_assert (allow_strd);
20294           if (emit)
20295             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20296           break;
20297
20298         case PRE_MODIFY:
20299         case POST_MODIFY:
20300           otherops[0] = operands[1];
20301           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20302           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20303
20304           /* IWMMXT allows offsets larger than strd can handle,
20305              fix these up with a pair of str.  */
20306           if (!TARGET_THUMB2
20307               && CONST_INT_P (otherops[2])
20308               && (INTVAL(otherops[2]) <= -256
20309                   || INTVAL(otherops[2]) >= 256))
20310             {
20311               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20312                 {
20313                   if (emit)
20314                     {
20315                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20316                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20317                     }
20318                   if (count)
20319                     *count = 2;
20320                 }
20321               else
20322                 {
20323                   if (emit)
20324                     {
20325                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20326                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20327                     }
20328                   if (count)
20329                     *count = 2;
20330                 }
20331             }
20332           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20333             {
20334               if (emit)
20335                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20336             }
20337           else
20338             {
20339               if (emit)
20340                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20341             }
20342           break;
20343
20344         case PLUS:
20345           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20346           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20347             {
20348               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20349                 {
20350                 case -8:
20351                   if (emit)
20352                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20353                   return "";
20354
20355                 case -4:
20356                   if (TARGET_THUMB2)
20357                     break;
20358                   if (emit)
20359                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20360                   return "";
20361
20362                 case 4:
20363                   if (TARGET_THUMB2)
20364                     break;
20365                   if (emit)
20366                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20367                   return "";
20368                 }
20369             }
20370           if (allow_strd
20371               && (REG_P (otherops[2])
20372                   || TARGET_THUMB2
20373                   || (CONST_INT_P (otherops[2])
20374                       && INTVAL (otherops[2]) > -256
20375                       && INTVAL (otherops[2]) < 256)))
20376             {
20377               otherops[0] = operands[1];
20378               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20379               if (emit)
20380                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20381               return "";
20382             }
20383           /* Fall through */
20384
20385         default:
20386           otherops[0] = adjust_address (operands[0], SImode, 4);
20387           otherops[1] = operands[1];
20388           if (emit)
20389             {
20390               output_asm_insn ("str%?\t%1, %0", operands);
20391               output_asm_insn ("str%?\t%H1, %0", otherops);
20392             }
20393           if (count)
20394             *count = 2;
20395         }
20396     }
20397
20398   return "";
20399 }
20400
20401 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20402    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20403
20404 const char *
20405 output_move_quad (rtx *operands)
20406 {
20407   if (REG_P (operands[0]))
20408     {
20409       /* Load, or reg->reg move.  */
20410
20411       if (MEM_P (operands[1]))
20412         {
20413           switch (GET_CODE (XEXP (operands[1], 0)))
20414             {
20415             case REG:
20416               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20417               break;
20418
20419             case LABEL_REF:
20420             case CONST:
20421               output_asm_insn ("adr%?\t%0, %1", operands);
20422               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20423               break;
20424
20425             default:
20426               gcc_unreachable ();
20427             }
20428         }
20429       else
20430         {
20431           rtx ops[2];
20432           int dest, src, i;
20433
20434           gcc_assert (REG_P (operands[1]));
20435
20436           dest = REGNO (operands[0]);
20437           src = REGNO (operands[1]);
20438
20439           /* This seems pretty dumb, but hopefully GCC won't try to do it
20440              very often.  */
20441           if (dest < src)
20442             for (i = 0; i < 4; i++)
20443               {
20444                 ops[0] = gen_rtx_REG (SImode, dest + i);
20445                 ops[1] = gen_rtx_REG (SImode, src + i);
20446                 output_asm_insn ("mov%?\t%0, %1", ops);
20447               }
20448           else
20449             for (i = 3; i >= 0; i--)
20450               {
20451                 ops[0] = gen_rtx_REG (SImode, dest + i);
20452                 ops[1] = gen_rtx_REG (SImode, src + i);
20453                 output_asm_insn ("mov%?\t%0, %1", ops);
20454               }
20455         }
20456     }
20457   else
20458     {
20459       gcc_assert (MEM_P (operands[0]));
20460       gcc_assert (REG_P (operands[1]));
20461       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20462
20463       switch (GET_CODE (XEXP (operands[0], 0)))
20464         {
20465         case REG:
20466           output_asm_insn ("stm%?\t%m0, %M1", operands);
20467           break;
20468
20469         default:
20470           gcc_unreachable ();
20471         }
20472     }
20473
20474   return "";
20475 }
20476
20477 /* Output a VFP load or store instruction.  */
20478
20479 const char *
20480 output_move_vfp (rtx *operands)
20481 {
20482   rtx reg, mem, addr, ops[2];
20483   int load = REG_P (operands[0]);
20484   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20485   int sp = (!TARGET_VFP_FP16INST
20486             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20487   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20488   const char *templ;
20489   char buff[50];
20490   machine_mode mode;
20491
20492   reg = operands[!load];
20493   mem = operands[load];
20494
20495   mode = GET_MODE (reg);
20496
20497   gcc_assert (REG_P (reg));
20498   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20499   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20500               || mode == SFmode
20501               || mode == DFmode
20502               || mode == HImode
20503               || mode == SImode
20504               || mode == DImode
20505               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20506   gcc_assert (MEM_P (mem));
20507
20508   addr = XEXP (mem, 0);
20509
20510   switch (GET_CODE (addr))
20511     {
20512     case PRE_DEC:
20513       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20514       ops[0] = XEXP (addr, 0);
20515       ops[1] = reg;
20516       break;
20517
20518     case POST_INC:
20519       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20520       ops[0] = XEXP (addr, 0);
20521       ops[1] = reg;
20522       break;
20523
20524     default:
20525       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20526       ops[0] = reg;
20527       ops[1] = mem;
20528       break;
20529     }
20530
20531   sprintf (buff, templ,
20532            load ? "ld" : "st",
20533            dp ? "64" : sp ? "32" : "16",
20534            dp ? "P" : "",
20535            integer_p ? "\t%@ int" : "");
20536   output_asm_insn (buff, ops);
20537
20538   return "";
20539 }
20540
20541 /* Output a Neon double-word or quad-word load or store, or a load
20542    or store for larger structure modes.
20543
20544    WARNING: The ordering of elements is weird in big-endian mode,
20545    because the EABI requires that vectors stored in memory appear
20546    as though they were stored by a VSTM, as required by the EABI.
20547    GCC RTL defines element ordering based on in-memory order.
20548    This can be different from the architectural ordering of elements
20549    within a NEON register. The intrinsics defined in arm_neon.h use the
20550    NEON register element ordering, not the GCC RTL element ordering.
20551
20552    For example, the in-memory ordering of a big-endian a quadword
20553    vector with 16-bit elements when stored from register pair {d0,d1}
20554    will be (lowest address first, d0[N] is NEON register element N):
20555
20556      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20557
20558    When necessary, quadword registers (dN, dN+1) are moved to ARM
20559    registers from rN in the order:
20560
20561      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20562
20563    So that STM/LDM can be used on vectors in ARM registers, and the
20564    same memory layout will result as if VSTM/VLDM were used.
20565
20566    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20567    possible, which allows use of appropriate alignment tags.
20568    Note that the choice of "64" is independent of the actual vector
20569    element size; this size simply ensures that the behavior is
20570    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20571
20572    Due to limitations of those instructions, use of VST1.64/VLD1.64
20573    is not possible if:
20574     - the address contains PRE_DEC, or
20575     - the mode refers to more than 4 double-word registers
20576
20577    In those cases, it would be possible to replace VSTM/VLDM by a
20578    sequence of instructions; this is not currently implemented since
20579    this is not certain to actually improve performance.  */
20580
20581 const char *
20582 output_move_neon (rtx *operands)
20583 {
20584   rtx reg, mem, addr, ops[2];
20585   int regno, nregs, load = REG_P (operands[0]);
20586   const char *templ;
20587   char buff[50];
20588   machine_mode mode;
20589
20590   reg = operands[!load];
20591   mem = operands[load];
20592
20593   mode = GET_MODE (reg);
20594
20595   gcc_assert (REG_P (reg));
20596   regno = REGNO (reg);
20597   nregs = REG_NREGS (reg) / 2;
20598   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20599               || NEON_REGNO_OK_FOR_QUAD (regno));
20600   gcc_assert (VALID_NEON_DREG_MODE (mode)
20601               || VALID_NEON_QREG_MODE (mode)
20602               || VALID_NEON_STRUCT_MODE (mode));
20603   gcc_assert (MEM_P (mem));
20604
20605   addr = XEXP (mem, 0);
20606
20607   /* Strip off const from addresses like (const (plus (...))).  */
20608   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20609     addr = XEXP (addr, 0);
20610
20611   switch (GET_CODE (addr))
20612     {
20613     case POST_INC:
20614       /* We have to use vldm / vstm for too-large modes.  */
20615       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20616         {
20617           templ = "v%smia%%?\t%%0!, %%h1";
20618           ops[0] = XEXP (addr, 0);
20619         }
20620       else
20621         {
20622           templ = "v%s1.64\t%%h1, %%A0";
20623           ops[0] = mem;
20624         }
20625       ops[1] = reg;
20626       break;
20627
20628     case PRE_DEC:
20629       /* We have to use vldm / vstm in this case, since there is no
20630          pre-decrement form of the vld1 / vst1 instructions.  */
20631       templ = "v%smdb%%?\t%%0!, %%h1";
20632       ops[0] = XEXP (addr, 0);
20633       ops[1] = reg;
20634       break;
20635
20636     case POST_MODIFY:
20637       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20638       gcc_unreachable ();
20639
20640     case REG:
20641       /* We have to use vldm / vstm for too-large modes.  */
20642       if (nregs > 1)
20643         {
20644           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20645             templ = "v%smia%%?\t%%m0, %%h1";
20646           else
20647             templ = "v%s1.64\t%%h1, %%A0";
20648
20649           ops[0] = mem;
20650           ops[1] = reg;
20651           break;
20652         }
20653       /* Fall through.  */
20654     case PLUS:
20655       if (GET_CODE (addr) == PLUS)
20656         addr = XEXP (addr, 0);
20657       /* Fall through.  */
20658     case LABEL_REF:
20659       {
20660         int i;
20661         int overlap = -1;
20662         for (i = 0; i < nregs; i++)
20663           {
20664             /* We're only using DImode here because it's a convenient
20665                size.  */
20666             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20667             ops[1] = adjust_address (mem, DImode, 8 * i);
20668             if (reg_overlap_mentioned_p (ops[0], mem))
20669               {
20670                 gcc_assert (overlap == -1);
20671                 overlap = i;
20672               }
20673             else
20674               {
20675                 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20676                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20677                 else
20678                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20679                 output_asm_insn (buff, ops);
20680               }
20681           }
20682         if (overlap != -1)
20683           {
20684             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20685             ops[1] = adjust_address (mem, SImode, 8 * overlap);
20686             if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20687               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20688             else
20689               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20690             output_asm_insn (buff, ops);
20691           }
20692
20693         return "";
20694       }
20695
20696     default:
20697       gcc_unreachable ();
20698     }
20699
20700   sprintf (buff, templ, load ? "ld" : "st");
20701   output_asm_insn (buff, ops);
20702
20703   return "";
20704 }
20705
20706 /* Compute and return the length of neon_mov<mode>, where <mode> is
20707    one of VSTRUCT modes: EI, OI, CI or XI.  */
20708 int
20709 arm_attr_length_move_neon (rtx_insn *insn)
20710 {
20711   rtx reg, mem, addr;
20712   int load;
20713   machine_mode mode;
20714
20715   extract_insn_cached (insn);
20716
20717   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20718     {
20719       mode = GET_MODE (recog_data.operand[0]);
20720       switch (mode)
20721         {
20722         case E_EImode:
20723         case E_OImode:
20724           return 8;
20725         case E_CImode:
20726           return 12;
20727         case E_XImode:
20728           return 16;
20729         default:
20730           gcc_unreachable ();
20731         }
20732     }
20733
20734   load = REG_P (recog_data.operand[0]);
20735   reg = recog_data.operand[!load];
20736   mem = recog_data.operand[load];
20737
20738   gcc_assert (MEM_P (mem));
20739
20740   addr = XEXP (mem, 0);
20741
20742   /* Strip off const from addresses like (const (plus (...))).  */
20743   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20744     addr = XEXP (addr, 0);
20745
20746   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20747     {
20748       int insns = REG_NREGS (reg) / 2;
20749       return insns * 4;
20750     }
20751   else
20752     return 4;
20753 }
20754
20755 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20756    return zero.  */
20757
20758 int
20759 arm_address_offset_is_imm (rtx_insn *insn)
20760 {
20761   rtx mem, addr;
20762
20763   extract_insn_cached (insn);
20764
20765   if (REG_P (recog_data.operand[0]))
20766     return 0;
20767
20768   mem = recog_data.operand[0];
20769
20770   gcc_assert (MEM_P (mem));
20771
20772   addr = XEXP (mem, 0);
20773
20774   if (REG_P (addr)
20775       || (GET_CODE (addr) == PLUS
20776           && REG_P (XEXP (addr, 0))
20777           && CONST_INT_P (XEXP (addr, 1))))
20778     return 1;
20779   else
20780     return 0;
20781 }
20782
20783 /* Output an ADD r, s, #n where n may be too big for one instruction.
20784    If adding zero to one register, output nothing.  */
20785 const char *
20786 output_add_immediate (rtx *operands)
20787 {
20788   HOST_WIDE_INT n = INTVAL (operands[2]);
20789
20790   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20791     {
20792       if (n < 0)
20793         output_multi_immediate (operands,
20794                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20795                                 -n);
20796       else
20797         output_multi_immediate (operands,
20798                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20799                                 n);
20800     }
20801
20802   return "";
20803 }
20804
20805 /* Output a multiple immediate operation.
20806    OPERANDS is the vector of operands referred to in the output patterns.
20807    INSTR1 is the output pattern to use for the first constant.
20808    INSTR2 is the output pattern to use for subsequent constants.
20809    IMMED_OP is the index of the constant slot in OPERANDS.
20810    N is the constant value.  */
20811 static const char *
20812 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20813                         int immed_op, HOST_WIDE_INT n)
20814 {
20815 #if HOST_BITS_PER_WIDE_INT > 32
20816   n &= 0xffffffff;
20817 #endif
20818
20819   if (n == 0)
20820     {
20821       /* Quick and easy output.  */
20822       operands[immed_op] = const0_rtx;
20823       output_asm_insn (instr1, operands);
20824     }
20825   else
20826     {
20827       int i;
20828       const char * instr = instr1;
20829
20830       /* Note that n is never zero here (which would give no output).  */
20831       for (i = 0; i < 32; i += 2)
20832         {
20833           if (n & (3 << i))
20834             {
20835               operands[immed_op] = GEN_INT (n & (255 << i));
20836               output_asm_insn (instr, operands);
20837               instr = instr2;
20838               i += 6;
20839             }
20840         }
20841     }
20842
20843   return "";
20844 }
20845
20846 /* Return the name of a shifter operation.  */
20847 static const char *
20848 arm_shift_nmem(enum rtx_code code)
20849 {
20850   switch (code)
20851     {
20852     case ASHIFT:
20853       return ARM_LSL_NAME;
20854
20855     case ASHIFTRT:
20856       return "asr";
20857
20858     case LSHIFTRT:
20859       return "lsr";
20860
20861     case ROTATERT:
20862       return "ror";
20863
20864     default:
20865       abort();
20866     }
20867 }
20868
20869 /* Return the appropriate ARM instruction for the operation code.
20870    The returned result should not be overwritten.  OP is the rtx of the
20871    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20872    was shifted.  */
20873 const char *
20874 arithmetic_instr (rtx op, int shift_first_arg)
20875 {
20876   switch (GET_CODE (op))
20877     {
20878     case PLUS:
20879       return "add";
20880
20881     case MINUS:
20882       return shift_first_arg ? "rsb" : "sub";
20883
20884     case IOR:
20885       return "orr";
20886
20887     case XOR:
20888       return "eor";
20889
20890     case AND:
20891       return "and";
20892
20893     case ASHIFT:
20894     case ASHIFTRT:
20895     case LSHIFTRT:
20896     case ROTATERT:
20897       return arm_shift_nmem(GET_CODE(op));
20898
20899     default:
20900       gcc_unreachable ();
20901     }
20902 }
20903
20904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20905    for the operation code.  The returned result should not be overwritten.
20906    OP is the rtx code of the shift.
20907    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20908    shift.  */
20909 static const char *
20910 shift_op (rtx op, HOST_WIDE_INT *amountp)
20911 {
20912   const char * mnem;
20913   enum rtx_code code = GET_CODE (op);
20914
20915   switch (code)
20916     {
20917     case ROTATE:
20918       if (!CONST_INT_P (XEXP (op, 1)))
20919         {
20920           output_operand_lossage ("invalid shift operand");
20921           return NULL;
20922         }
20923
20924       code = ROTATERT;
20925       *amountp = 32 - INTVAL (XEXP (op, 1));
20926       mnem = "ror";
20927       break;
20928
20929     case ASHIFT:
20930     case ASHIFTRT:
20931     case LSHIFTRT:
20932     case ROTATERT:
20933       mnem = arm_shift_nmem(code);
20934       if (CONST_INT_P (XEXP (op, 1)))
20935         {
20936           *amountp = INTVAL (XEXP (op, 1));
20937         }
20938       else if (REG_P (XEXP (op, 1)))
20939         {
20940           *amountp = -1;
20941           return mnem;
20942         }
20943       else
20944         {
20945           output_operand_lossage ("invalid shift operand");
20946           return NULL;
20947         }
20948       break;
20949
20950     case MULT:
20951       /* We never have to worry about the amount being other than a
20952          power of 2, since this case can never be reloaded from a reg.  */
20953       if (!CONST_INT_P (XEXP (op, 1)))
20954         {
20955           output_operand_lossage ("invalid shift operand");
20956           return NULL;
20957         }
20958
20959       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20960
20961       /* Amount must be a power of two.  */
20962       if (*amountp & (*amountp - 1))
20963         {
20964           output_operand_lossage ("invalid shift operand");
20965           return NULL;
20966         }
20967
20968       *amountp = exact_log2 (*amountp);
20969       gcc_assert (IN_RANGE (*amountp, 0, 31));
20970       return ARM_LSL_NAME;
20971
20972     default:
20973       output_operand_lossage ("invalid shift operand");
20974       return NULL;
20975     }
20976
20977   /* This is not 100% correct, but follows from the desire to merge
20978      multiplication by a power of 2 with the recognizer for a
20979      shift.  >=32 is not a valid shift for "lsl", so we must try and
20980      output a shift that produces the correct arithmetical result.
20981      Using lsr #32 is identical except for the fact that the carry bit
20982      is not set correctly if we set the flags; but we never use the
20983      carry bit from such an operation, so we can ignore that.  */
20984   if (code == ROTATERT)
20985     /* Rotate is just modulo 32.  */
20986     *amountp &= 31;
20987   else if (*amountp != (*amountp & 31))
20988     {
20989       if (code == ASHIFT)
20990         mnem = "lsr";
20991       *amountp = 32;
20992     }
20993
20994   /* Shifts of 0 are no-ops.  */
20995   if (*amountp == 0)
20996     return NULL;
20997
20998   return mnem;
20999 }
21000
21001 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
21002    because /bin/as is horribly restrictive.  The judgement about
21003    whether or not each character is 'printable' (and can be output as
21004    is) or not (and must be printed with an octal escape) must be made
21005    with reference to the *host* character set -- the situation is
21006    similar to that discussed in the comments above pp_c_char in
21007    c-pretty-print.cc.  */
21008
21009 #define MAX_ASCII_LEN 51
21010
21011 void
21012 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21013 {
21014   int i;
21015   int len_so_far = 0;
21016
21017   fputs ("\t.ascii\t\"", stream);
21018
21019   for (i = 0; i < len; i++)
21020     {
21021       int c = p[i];
21022
21023       if (len_so_far >= MAX_ASCII_LEN)
21024         {
21025           fputs ("\"\n\t.ascii\t\"", stream);
21026           len_so_far = 0;
21027         }
21028
21029       if (ISPRINT (c))
21030         {
21031           if (c == '\\' || c == '\"')
21032             {
21033               putc ('\\', stream);
21034               len_so_far++;
21035             }
21036           putc (c, stream);
21037           len_so_far++;
21038         }
21039       else
21040         {
21041           fprintf (stream, "\\%03o", c);
21042           len_so_far += 4;
21043         }
21044     }
21045
21046   fputs ("\"\n", stream);
21047 }
21048 \f
21049
21050 /* Compute the register save mask for registers 0 through 12
21051    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
21052
21053 static unsigned long
21054 arm_compute_save_reg0_reg12_mask (void)
21055 {
21056   unsigned long func_type = arm_current_func_type ();
21057   unsigned long save_reg_mask = 0;
21058   unsigned int reg;
21059
21060   if (IS_INTERRUPT (func_type))
21061     {
21062       unsigned int max_reg;
21063       /* Interrupt functions must not corrupt any registers,
21064          even call clobbered ones.  If this is a leaf function
21065          we can just examine the registers used by the RTL, but
21066          otherwise we have to assume that whatever function is
21067          called might clobber anything, and so we have to save
21068          all the call-clobbered registers as well.  */
21069       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21070         /* FIQ handlers have registers r8 - r12 banked, so
21071            we only need to check r0 - r7, Normal ISRs only
21072            bank r14 and r15, so we must check up to r12.
21073            r13 is the stack pointer which is always preserved,
21074            so we do not need to consider it here.  */
21075         max_reg = 7;
21076       else
21077         max_reg = 12;
21078
21079       for (reg = 0; reg <= max_reg; reg++)
21080         if (reg_needs_saving_p (reg))
21081           save_reg_mask |= (1 << reg);
21082
21083       /* Also save the pic base register if necessary.  */
21084       if (PIC_REGISTER_MAY_NEED_SAVING
21085           && crtl->uses_pic_offset_table)
21086         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21087     }
21088   else if (IS_VOLATILE(func_type))
21089     {
21090       /* For noreturn functions we historically omitted register saves
21091          altogether.  However this really messes up debugging.  As a
21092          compromise save just the frame pointers.  Combined with the link
21093          register saved elsewhere this should be sufficient to get
21094          a backtrace.  */
21095       if (frame_pointer_needed)
21096         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21097       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21098         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21099       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21100         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21101     }
21102   else
21103     {
21104       /* In the normal case we only need to save those registers
21105          which are call saved and which are used by this function.  */
21106       for (reg = 0; reg <= 11; reg++)
21107         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21108           save_reg_mask |= (1 << reg);
21109
21110       /* Handle the frame pointer as a special case.  */
21111       if (frame_pointer_needed)
21112         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21113
21114       /* If we aren't loading the PIC register,
21115          don't stack it even though it may be live.  */
21116       if (PIC_REGISTER_MAY_NEED_SAVING
21117           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21118               || crtl->uses_pic_offset_table))
21119         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21120
21121       /* The prologue will copy SP into R0, so save it.  */
21122       if (IS_STACKALIGN (func_type))
21123         save_reg_mask |= 1;
21124     }
21125
21126   /* Save registers so the exception handler can modify them.  */
21127   if (crtl->calls_eh_return)
21128     {
21129       unsigned int i;
21130
21131       for (i = 0; ; i++)
21132         {
21133           reg = EH_RETURN_DATA_REGNO (i);
21134           if (reg == INVALID_REGNUM)
21135             break;
21136           save_reg_mask |= 1 << reg;
21137         }
21138     }
21139
21140   return save_reg_mask;
21141 }
21142
21143 /* Return true if r3 is live at the start of the function.  */
21144
21145 static bool
21146 arm_r3_live_at_start_p (void)
21147 {
21148   /* Just look at cfg info, which is still close enough to correct at this
21149      point.  This gives false positives for broken functions that might use
21150      uninitialized data that happens to be allocated in r3, but who cares?  */
21151   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21152 }
21153
21154 /* Compute the number of bytes used to store the static chain register on the
21155    stack, above the stack frame.  We need to know this accurately to get the
21156    alignment of the rest of the stack frame correct.  */
21157
21158 static int
21159 arm_compute_static_chain_stack_bytes (void)
21160 {
21161   /* Once the value is updated from the init value of -1, do not
21162      re-compute.  */
21163   if (cfun->machine->static_chain_stack_bytes != -1)
21164     return cfun->machine->static_chain_stack_bytes;
21165
21166   /* See the defining assertion in arm_expand_prologue.  */
21167   if (IS_NESTED (arm_current_func_type ())
21168       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21169           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21170                || flag_stack_clash_protection)
21171               && !df_regs_ever_live_p (LR_REGNUM)))
21172       && arm_r3_live_at_start_p ()
21173       && crtl->args.pretend_args_size == 0)
21174     return 4;
21175
21176   return 0;
21177 }
21178
21179 /* Compute a bit mask of which core registers need to be
21180    saved on the stack for the current function.
21181    This is used by arm_compute_frame_layout, which may add extra registers.  */
21182
21183 static unsigned long
21184 arm_compute_save_core_reg_mask (void)
21185 {
21186   unsigned int save_reg_mask = 0;
21187   unsigned long func_type = arm_current_func_type ();
21188   unsigned int reg;
21189
21190   if (IS_NAKED (func_type))
21191     /* This should never really happen.  */
21192     return 0;
21193
21194   /* If we are creating a stack frame, then we must save the frame pointer,
21195      IP (which will hold the old stack pointer), LR and the PC.  */
21196   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21197     save_reg_mask |=
21198       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21199       | (1 << IP_REGNUM)
21200       | (1 << LR_REGNUM)
21201       | (1 << PC_REGNUM);
21202
21203   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21204
21205   /* Decide if we need to save the link register.
21206      Interrupt routines have their own banked link register,
21207      so they never need to save it.
21208      Otherwise if we do not use the link register we do not need to save
21209      it.  If we are pushing other registers onto the stack however, we
21210      can save an instruction in the epilogue by pushing the link register
21211      now and then popping it back into the PC.  This incurs extra memory
21212      accesses though, so we only do it when optimizing for size, and only
21213      if we know that we will not need a fancy return sequence.  */
21214   if (df_regs_ever_live_p (LR_REGNUM)
21215       || (save_reg_mask
21216           && optimize_size
21217           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21218           && !crtl->tail_call_emit
21219           && !crtl->calls_eh_return))
21220     save_reg_mask |= 1 << LR_REGNUM;
21221
21222   if (cfun->machine->lr_save_eliminated)
21223     save_reg_mask &= ~ (1 << LR_REGNUM);
21224
21225   if (TARGET_REALLY_IWMMXT
21226       && ((bit_count (save_reg_mask)
21227            + ARM_NUM_INTS (crtl->args.pretend_args_size +
21228                            arm_compute_static_chain_stack_bytes())
21229            ) % 2) != 0)
21230     {
21231       /* The total number of registers that are going to be pushed
21232          onto the stack is odd.  We need to ensure that the stack
21233          is 64-bit aligned before we start to save iWMMXt registers,
21234          and also before we start to create locals.  (A local variable
21235          might be a double or long long which we will load/store using
21236          an iWMMXt instruction).  Therefore we need to push another
21237          ARM register, so that the stack will be 64-bit aligned.  We
21238          try to avoid using the arg registers (r0 -r3) as they might be
21239          used to pass values in a tail call.  */
21240       for (reg = 4; reg <= 12; reg++)
21241         if ((save_reg_mask & (1 << reg)) == 0)
21242           break;
21243
21244       if (reg <= 12)
21245         save_reg_mask |= (1 << reg);
21246       else
21247         {
21248           cfun->machine->sibcall_blocked = 1;
21249           save_reg_mask |= (1 << 3);
21250         }
21251     }
21252
21253   /* We may need to push an additional register for use initializing the
21254      PIC base register.  */
21255   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21256       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21257     {
21258       reg = thumb_find_work_register (1 << 4);
21259       if (!call_used_or_fixed_reg_p (reg))
21260         save_reg_mask |= (1 << reg);
21261     }
21262
21263   return save_reg_mask;
21264 }
21265
21266 /* Compute a bit mask of which core registers need to be
21267    saved on the stack for the current function.  */
21268 static unsigned long
21269 thumb1_compute_save_core_reg_mask (void)
21270 {
21271   unsigned long mask;
21272   unsigned reg;
21273
21274   mask = 0;
21275   for (reg = 0; reg < 12; reg ++)
21276     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21277       mask |= 1 << reg;
21278
21279   /* Handle the frame pointer as a special case.  */
21280   if (frame_pointer_needed)
21281     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21282
21283   if (flag_pic
21284       && !TARGET_SINGLE_PIC_BASE
21285       && arm_pic_register != INVALID_REGNUM
21286       && crtl->uses_pic_offset_table)
21287     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21288
21289   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21290   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21291     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21292
21293   /* LR will also be pushed if any lo regs are pushed.  */
21294   if (mask & 0xff || thumb_force_lr_save ())
21295     mask |= (1 << LR_REGNUM);
21296
21297   bool call_clobbered_scratch
21298     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21299        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21300
21301   /* Make sure we have a low work register if we need one.  We will
21302      need one if we are going to push a high register, but we are not
21303      currently intending to push a low register.  However if both the
21304      prologue and epilogue have a spare call-clobbered low register,
21305      then we won't need to find an additional work register.  It does
21306      not need to be the same register in the prologue and
21307      epilogue.  */
21308   if ((mask & 0xff) == 0
21309       && !call_clobbered_scratch
21310       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21311     {
21312       /* Use thumb_find_work_register to choose which register
21313          we will use.  If the register is live then we will
21314          have to push it.  Use LAST_LO_REGNUM as our fallback
21315          choice for the register to select.  */
21316       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21317       /* Make sure the register returned by thumb_find_work_register is
21318          not part of the return value.  */
21319       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21320         reg = LAST_LO_REGNUM;
21321
21322       if (callee_saved_reg_p (reg))
21323         mask |= 1 << reg;
21324     }
21325
21326   /* The 504 below is 8 bytes less than 512 because there are two possible
21327      alignment words.  We can't tell here if they will be present or not so we
21328      have to play it safe and assume that they are. */
21329   if ((CALLER_INTERWORKING_SLOT_SIZE +
21330        ROUND_UP_WORD (get_frame_size ()) +
21331        crtl->outgoing_args_size) >= 504)
21332     {
21333       /* This is the same as the code in thumb1_expand_prologue() which
21334          determines which register to use for stack decrement. */
21335       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21336         if (mask & (1 << reg))
21337           break;
21338
21339       if (reg > LAST_LO_REGNUM)
21340         {
21341           /* Make sure we have a register available for stack decrement. */
21342           mask |= 1 << LAST_LO_REGNUM;
21343         }
21344     }
21345
21346   return mask;
21347 }
21348
21349 /* Return the number of bytes required to save VFP registers.  */
21350 static int
21351 arm_get_vfp_saved_size (void)
21352 {
21353   unsigned int regno;
21354   int count;
21355   int saved;
21356
21357   saved = 0;
21358   /* Space for saved VFP registers.  */
21359   if (TARGET_VFP_BASE)
21360     {
21361       count = 0;
21362       for (regno = FIRST_VFP_REGNUM;
21363            regno < LAST_VFP_REGNUM;
21364            regno += 2)
21365         {
21366           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21367             {
21368               if (count > 0)
21369                 {
21370                   /* Workaround ARM10 VFPr1 bug.  */
21371                   if (count == 2 && !arm_arch6)
21372                     count++;
21373                   saved += count * 8;
21374                 }
21375               count = 0;
21376             }
21377           else
21378             count++;
21379         }
21380       if (count > 0)
21381         {
21382           if (count == 2 && !arm_arch6)
21383             count++;
21384           saved += count * 8;
21385         }
21386     }
21387   return saved;
21388 }
21389
21390
21391 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21392    everything bar the final return instruction.  If simple_return is true,
21393    then do not output epilogue, because it has already been emitted in RTL.
21394
21395    Note: do not forget to update length attribute of corresponding insn pattern
21396    when changing assembly output (eg. length attribute of
21397    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21398    register clearing sequences).  */
21399 const char *
21400 output_return_instruction (rtx operand, bool really_return, bool reverse,
21401                            bool simple_return)
21402 {
21403   char conditional[10];
21404   char instr[100];
21405   unsigned reg;
21406   unsigned long live_regs_mask;
21407   unsigned long func_type;
21408   arm_stack_offsets *offsets;
21409
21410   func_type = arm_current_func_type ();
21411
21412   if (IS_NAKED (func_type))
21413     return "";
21414
21415   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21416     {
21417       /* If this function was declared non-returning, and we have
21418          found a tail call, then we have to trust that the called
21419          function won't return.  */
21420       if (really_return)
21421         {
21422           rtx ops[2];
21423
21424           /* Otherwise, trap an attempted return by aborting.  */
21425           ops[0] = operand;
21426           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21427                                        : "abort");
21428           assemble_external_libcall (ops[1]);
21429           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21430         }
21431
21432       return "";
21433     }
21434
21435   gcc_assert (!cfun->calls_alloca || really_return);
21436
21437   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21438
21439   cfun->machine->return_used_this_function = 1;
21440
21441   offsets = arm_get_frame_offsets ();
21442   live_regs_mask = offsets->saved_regs_mask;
21443
21444   if (!simple_return && live_regs_mask)
21445     {
21446       const char * return_reg;
21447
21448       /* If we do not have any special requirements for function exit
21449          (e.g. interworking) then we can load the return address
21450          directly into the PC.  Otherwise we must load it into LR.  */
21451       if (really_return
21452           && !IS_CMSE_ENTRY (func_type)
21453           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21454         return_reg = reg_names[PC_REGNUM];
21455       else
21456         return_reg = reg_names[LR_REGNUM];
21457
21458       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21459         {
21460           /* There are three possible reasons for the IP register
21461              being saved.  1) a stack frame was created, in which case
21462              IP contains the old stack pointer, or 2) an ISR routine
21463              corrupted it, or 3) it was saved to align the stack on
21464              iWMMXt.  In case 1, restore IP into SP, otherwise just
21465              restore IP.  */
21466           if (frame_pointer_needed)
21467             {
21468               live_regs_mask &= ~ (1 << IP_REGNUM);
21469               live_regs_mask |=   (1 << SP_REGNUM);
21470             }
21471           else
21472             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21473         }
21474
21475       /* On some ARM architectures it is faster to use LDR rather than
21476          LDM to load a single register.  On other architectures, the
21477          cost is the same.  In 26 bit mode, or for exception handlers,
21478          we have to use LDM to load the PC so that the CPSR is also
21479          restored.  */
21480       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21481         if (live_regs_mask == (1U << reg))
21482           break;
21483
21484       if (reg <= LAST_ARM_REGNUM
21485           && (reg != LR_REGNUM
21486               || ! really_return
21487               || ! IS_INTERRUPT (func_type)))
21488         {
21489           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21490                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21491         }
21492       else
21493         {
21494           char *p;
21495           int first = 1;
21496
21497           /* Generate the load multiple instruction to restore the
21498              registers.  Note we can get here, even if
21499              frame_pointer_needed is true, but only if sp already
21500              points to the base of the saved core registers.  */
21501           if (live_regs_mask & (1 << SP_REGNUM))
21502             {
21503               unsigned HOST_WIDE_INT stack_adjust;
21504
21505               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21506               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21507
21508               if (stack_adjust && arm_arch5t && TARGET_ARM)
21509                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21510               else
21511                 {
21512                   /* If we can't use ldmib (SA110 bug),
21513                      then try to pop r3 instead.  */
21514                   if (stack_adjust)
21515                     live_regs_mask |= 1 << 3;
21516
21517                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21518                 }
21519             }
21520           /* For interrupt returns we have to use an LDM rather than
21521              a POP so that we can use the exception return variant.  */
21522           else if (IS_INTERRUPT (func_type))
21523             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21524           else
21525             sprintf (instr, "pop%s\t{", conditional);
21526
21527           p = instr + strlen (instr);
21528
21529           for (reg = 0; reg <= SP_REGNUM; reg++)
21530             if (live_regs_mask & (1 << reg))
21531               {
21532                 int l = strlen (reg_names[reg]);
21533
21534                 if (first)
21535                   first = 0;
21536                 else
21537                   {
21538                     memcpy (p, ", ", 2);
21539                     p += 2;
21540                   }
21541
21542                 memcpy (p, "%|", 2);
21543                 memcpy (p + 2, reg_names[reg], l);
21544                 p += l + 2;
21545               }
21546
21547           if (live_regs_mask & (1 << LR_REGNUM))
21548             {
21549               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21550               /* If returning from an interrupt, restore the CPSR.  */
21551               if (IS_INTERRUPT (func_type))
21552                 strcat (p, "^");
21553             }
21554           else
21555             strcpy (p, "}");
21556         }
21557
21558       output_asm_insn (instr, & operand);
21559
21560       /* See if we need to generate an extra instruction to
21561          perform the actual function return.  */
21562       if (really_return
21563           && func_type != ARM_FT_INTERWORKED
21564           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21565         {
21566           /* The return has already been handled
21567              by loading the LR into the PC.  */
21568           return "";
21569         }
21570     }
21571
21572   if (really_return)
21573     {
21574       switch ((int) ARM_FUNC_TYPE (func_type))
21575         {
21576         case ARM_FT_ISR:
21577         case ARM_FT_FIQ:
21578           /* ??? This is wrong for unified assembly syntax.  */
21579           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21580           break;
21581
21582         case ARM_FT_INTERWORKED:
21583           gcc_assert (arm_arch5t || arm_arch4t);
21584           sprintf (instr, "bx%s\t%%|lr", conditional);
21585           break;
21586
21587         case ARM_FT_EXCEPTION:
21588           /* ??? This is wrong for unified assembly syntax.  */
21589           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21590           break;
21591
21592         default:
21593           if (IS_CMSE_ENTRY (func_type))
21594             {
21595               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21596                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21597                  VSTR/VLDR instructions in the prologue and epilogue.  */
21598               if (!TARGET_HAVE_FPCXT_CMSE)
21599                 {
21600                   /* Check if we have to clear the 'GE bits' which is only used if
21601                      parallel add and subtraction instructions are available.  */
21602                   if (TARGET_INT_SIMD)
21603                     snprintf (instr, sizeof (instr),
21604                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21605                   else
21606                     snprintf (instr, sizeof (instr),
21607                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21608
21609                   output_asm_insn (instr, & operand);
21610                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21611                      care of it.  */
21612                   if (TARGET_HARD_FLOAT)
21613                     {
21614                       /* Clear the cumulative exception-status bits (0-4,7) and
21615                          the condition code bits (28-31) of the FPSCR.  We need
21616                          to remember to clear the first scratch register used
21617                          (IP) and save and restore the second (r4).
21618
21619                          Important note: the length of the
21620                          thumb2_cmse_entry_return insn pattern must account for
21621                          the size of the below instructions.  */
21622                       output_asm_insn ("push\t{%|r4}", & operand);
21623                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21624                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21625                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21626                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21627                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21628                       output_asm_insn ("pop\t{%|r4}", & operand);
21629                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21630                     }
21631                 }
21632               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21633             }
21634           /* Use bx if it's available.  */
21635           else if (arm_arch5t || arm_arch4t)
21636             sprintf (instr, "bx%s\t%%|lr", conditional);
21637           else
21638             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21639           break;
21640         }
21641
21642       output_asm_insn (instr, & operand);
21643     }
21644
21645   return "";
21646 }
21647
21648 /* Output in FILE asm statements needed to declare the NAME of the function
21649    defined by its DECL node.  */
21650
21651 void
21652 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21653 {
21654   size_t cmse_name_len;
21655   char *cmse_name = 0;
21656   char cmse_prefix[] = "__acle_se_";
21657
21658   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21659      extra function label for each function with the 'cmse_nonsecure_entry'
21660      attribute.  This extra function label should be prepended with
21661      '__acle_se_', telling the linker that it needs to create secure gateway
21662      veneers for this function.  */
21663   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21664                                     DECL_ATTRIBUTES (decl)))
21665     {
21666       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21667       cmse_name = XALLOCAVEC (char, cmse_name_len);
21668       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21669       targetm.asm_out.globalize_label (file, cmse_name);
21670
21671       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21672       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21673     }
21674
21675   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21676   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21677   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21678   ASM_OUTPUT_LABEL (file, name);
21679
21680   if (cmse_name)
21681     ASM_OUTPUT_LABEL (file, cmse_name);
21682
21683   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21684 }
21685
21686 /* Write the function name into the code section, directly preceding
21687    the function prologue.
21688
21689    Code will be output similar to this:
21690      t0
21691          .ascii "arm_poke_function_name", 0
21692          .align
21693      t1
21694          .word 0xff000000 + (t1 - t0)
21695      arm_poke_function_name
21696          mov     ip, sp
21697          stmfd   sp!, {fp, ip, lr, pc}
21698          sub     fp, ip, #4
21699
21700    When performing a stack backtrace, code can inspect the value
21701    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21702    at location pc - 12 and the top 8 bits are set, then we know
21703    that there is a function name embedded immediately preceding this
21704    location and has length ((pc[-3]) & 0xff000000).
21705
21706    We assume that pc is declared as a pointer to an unsigned long.
21707
21708    It is of no benefit to output the function name if we are assembling
21709    a leaf function.  These function types will not contain a stack
21710    backtrace structure, therefore it is not possible to determine the
21711    function name.  */
21712 void
21713 arm_poke_function_name (FILE *stream, const char *name)
21714 {
21715   unsigned long alignlength;
21716   unsigned long length;
21717   rtx           x;
21718
21719   length      = strlen (name) + 1;
21720   alignlength = ROUND_UP_WORD (length);
21721
21722   ASM_OUTPUT_ASCII (stream, name, length);
21723   ASM_OUTPUT_ALIGN (stream, 2);
21724   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21725   assemble_aligned_integer (UNITS_PER_WORD, x);
21726 }
21727
21728 /* Place some comments into the assembler stream
21729    describing the current function.  */
21730 static void
21731 arm_output_function_prologue (FILE *f)
21732 {
21733   unsigned long func_type;
21734
21735   /* Sanity check.  */
21736   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21737
21738   func_type = arm_current_func_type ();
21739
21740   switch ((int) ARM_FUNC_TYPE (func_type))
21741     {
21742     default:
21743     case ARM_FT_NORMAL:
21744       break;
21745     case ARM_FT_INTERWORKED:
21746       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21747       break;
21748     case ARM_FT_ISR:
21749       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21750       break;
21751     case ARM_FT_FIQ:
21752       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21753       break;
21754     case ARM_FT_EXCEPTION:
21755       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21756       break;
21757     }
21758
21759   if (IS_NAKED (func_type))
21760     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21761
21762   if (IS_VOLATILE (func_type))
21763     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21764
21765   if (IS_NESTED (func_type))
21766     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21767   if (IS_STACKALIGN (func_type))
21768     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21769   if (IS_CMSE_ENTRY (func_type))
21770     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21771
21772   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21773                (HOST_WIDE_INT) crtl->args.size,
21774                crtl->args.pretend_args_size,
21775                (HOST_WIDE_INT) get_frame_size ());
21776
21777   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21778                frame_pointer_needed,
21779                cfun->machine->uses_anonymous_args);
21780
21781   if (cfun->machine->lr_save_eliminated)
21782     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21783
21784   if (crtl->calls_eh_return)
21785     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21786
21787 }
21788
21789 static void
21790 arm_output_function_epilogue (FILE *)
21791 {
21792   arm_stack_offsets *offsets;
21793
21794   if (TARGET_THUMB1)
21795     {
21796       int regno;
21797
21798       /* Emit any call-via-reg trampolines that are needed for v4t support
21799          of call_reg and call_value_reg type insns.  */
21800       for (regno = 0; regno < LR_REGNUM; regno++)
21801         {
21802           rtx label = cfun->machine->call_via[regno];
21803
21804           if (label != NULL)
21805             {
21806               switch_to_section (function_section (current_function_decl));
21807               targetm.asm_out.internal_label (asm_out_file, "L",
21808                                               CODE_LABEL_NUMBER (label));
21809               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21810             }
21811         }
21812
21813       /* ??? Probably not safe to set this here, since it assumes that a
21814          function will be emitted as assembly immediately after we generate
21815          RTL for it.  This does not happen for inline functions.  */
21816       cfun->machine->return_used_this_function = 0;
21817     }
21818   else /* TARGET_32BIT */
21819     {
21820       /* We need to take into account any stack-frame rounding.  */
21821       offsets = arm_get_frame_offsets ();
21822
21823       gcc_assert (!use_return_insn (FALSE, NULL)
21824                   || (cfun->machine->return_used_this_function != 0)
21825                   || offsets->saved_regs == offsets->outgoing_args
21826                   || frame_pointer_needed);
21827     }
21828 }
21829
21830 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21831    STR and STRD.  If an even number of registers are being pushed, one
21832    or more STRD patterns are created for each register pair.  If an
21833    odd number of registers are pushed, emit an initial STR followed by
21834    as many STRD instructions as are needed.  This works best when the
21835    stack is initially 64-bit aligned (the normal case), since it
21836    ensures that each STRD is also 64-bit aligned.  */
21837 static void
21838 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21839 {
21840   int num_regs = 0;
21841   int i;
21842   int regno;
21843   rtx par = NULL_RTX;
21844   rtx dwarf = NULL_RTX;
21845   rtx tmp;
21846   bool first = true;
21847
21848   num_regs = bit_count (saved_regs_mask);
21849
21850   /* Must be at least one register to save, and can't save SP or PC.  */
21851   gcc_assert (num_regs > 0 && num_regs <= 14);
21852   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21853   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21854
21855   /* Create sequence for DWARF info.  All the frame-related data for
21856      debugging is held in this wrapper.  */
21857   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21858
21859   /* Describe the stack adjustment.  */
21860   tmp = gen_rtx_SET (stack_pointer_rtx,
21861                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21862   RTX_FRAME_RELATED_P (tmp) = 1;
21863   XVECEXP (dwarf, 0, 0) = tmp;
21864
21865   /* Find the first register.  */
21866   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21867     ;
21868
21869   i = 0;
21870
21871   /* If there's an odd number of registers to push.  Start off by
21872      pushing a single register.  This ensures that subsequent strd
21873      operations are dword aligned (assuming that SP was originally
21874      64-bit aligned).  */
21875   if ((num_regs & 1) != 0)
21876     {
21877       rtx reg, mem, insn;
21878
21879       reg = gen_rtx_REG (SImode, regno);
21880       if (num_regs == 1)
21881         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21882                                                      stack_pointer_rtx));
21883       else
21884         mem = gen_frame_mem (Pmode,
21885                              gen_rtx_PRE_MODIFY
21886                              (Pmode, stack_pointer_rtx,
21887                               plus_constant (Pmode, stack_pointer_rtx,
21888                                              -4 * num_regs)));
21889
21890       tmp = gen_rtx_SET (mem, reg);
21891       RTX_FRAME_RELATED_P (tmp) = 1;
21892       insn = emit_insn (tmp);
21893       RTX_FRAME_RELATED_P (insn) = 1;
21894       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21895       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21896       RTX_FRAME_RELATED_P (tmp) = 1;
21897       i++;
21898       regno++;
21899       XVECEXP (dwarf, 0, i) = tmp;
21900       first = false;
21901     }
21902
21903   while (i < num_regs)
21904     if (saved_regs_mask & (1 << regno))
21905       {
21906         rtx reg1, reg2, mem1, mem2;
21907         rtx tmp0, tmp1, tmp2;
21908         int regno2;
21909
21910         /* Find the register to pair with this one.  */
21911         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21912              regno2++)
21913           ;
21914
21915         reg1 = gen_rtx_REG (SImode, regno);
21916         reg2 = gen_rtx_REG (SImode, regno2);
21917
21918         if (first)
21919           {
21920             rtx insn;
21921
21922             first = false;
21923             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21924                                                         stack_pointer_rtx,
21925                                                         -4 * num_regs));
21926             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21927                                                         stack_pointer_rtx,
21928                                                         -4 * (num_regs - 1)));
21929             tmp0 = gen_rtx_SET (stack_pointer_rtx,
21930                                 plus_constant (Pmode, stack_pointer_rtx,
21931                                                -4 * (num_regs)));
21932             tmp1 = gen_rtx_SET (mem1, reg1);
21933             tmp2 = gen_rtx_SET (mem2, reg2);
21934             RTX_FRAME_RELATED_P (tmp0) = 1;
21935             RTX_FRAME_RELATED_P (tmp1) = 1;
21936             RTX_FRAME_RELATED_P (tmp2) = 1;
21937             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21938             XVECEXP (par, 0, 0) = tmp0;
21939             XVECEXP (par, 0, 1) = tmp1;
21940             XVECEXP (par, 0, 2) = tmp2;
21941             insn = emit_insn (par);
21942             RTX_FRAME_RELATED_P (insn) = 1;
21943             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21944           }
21945         else
21946           {
21947             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21948                                                         stack_pointer_rtx,
21949                                                         4 * i));
21950             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21951                                                         stack_pointer_rtx,
21952                                                         4 * (i + 1)));
21953             tmp1 = gen_rtx_SET (mem1, reg1);
21954             tmp2 = gen_rtx_SET (mem2, reg2);
21955             RTX_FRAME_RELATED_P (tmp1) = 1;
21956             RTX_FRAME_RELATED_P (tmp2) = 1;
21957             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21958             XVECEXP (par, 0, 0) = tmp1;
21959             XVECEXP (par, 0, 1) = tmp2;
21960             emit_insn (par);
21961           }
21962
21963         /* Create unwind information.  This is an approximation.  */
21964         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21965                                            plus_constant (Pmode,
21966                                                           stack_pointer_rtx,
21967                                                           4 * i)),
21968                             reg1);
21969         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21970                                            plus_constant (Pmode,
21971                                                           stack_pointer_rtx,
21972                                                           4 * (i + 1))),
21973                             reg2);
21974
21975         RTX_FRAME_RELATED_P (tmp1) = 1;
21976         RTX_FRAME_RELATED_P (tmp2) = 1;
21977         XVECEXP (dwarf, 0, i + 1) = tmp1;
21978         XVECEXP (dwarf, 0, i + 2) = tmp2;
21979         i += 2;
21980         regno = regno2 + 1;
21981       }
21982     else
21983       regno++;
21984
21985   return;
21986 }
21987
21988 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
21989    whenever possible, otherwise it emits single-word stores.  The first store
21990    also allocates stack space for all saved registers, using writeback with
21991    post-addressing mode.  All other stores use offset addressing.  If no STRD
21992    can be emitted, this function emits a sequence of single-word stores,
21993    and not an STM as before, because single-word stores provide more freedom
21994    scheduling and can be turned into an STM by peephole optimizations.  */
21995 static void
21996 arm_emit_strd_push (unsigned long saved_regs_mask)
21997 {
21998   int num_regs = 0;
21999   int i, j, dwarf_index  = 0;
22000   int offset = 0;
22001   rtx dwarf = NULL_RTX;
22002   rtx insn = NULL_RTX;
22003   rtx tmp, mem;
22004
22005   /* TODO: A more efficient code can be emitted by changing the
22006      layout, e.g., first push all pairs that can use STRD to keep the
22007      stack aligned, and then push all other registers.  */
22008   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22009     if (saved_regs_mask & (1 << i))
22010       num_regs++;
22011
22012   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22013   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22014   gcc_assert (num_regs > 0);
22015
22016   /* Create sequence for DWARF info.  */
22017   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22018
22019   /* For dwarf info, we generate explicit stack update.  */
22020   tmp = gen_rtx_SET (stack_pointer_rtx,
22021                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22022   RTX_FRAME_RELATED_P (tmp) = 1;
22023   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22024
22025   /* Save registers.  */
22026   offset = - 4 * num_regs;
22027   j = 0;
22028   while (j <= LAST_ARM_REGNUM)
22029     if (saved_regs_mask & (1 << j))
22030       {
22031         if ((j % 2 == 0)
22032             && (saved_regs_mask & (1 << (j + 1))))
22033           {
22034             /* Current register and previous register form register pair for
22035                which STRD can be generated.  */
22036             if (offset < 0)
22037               {
22038                 /* Allocate stack space for all saved registers.  */
22039                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22040                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22041                 mem = gen_frame_mem (DImode, tmp);
22042                 offset = 0;
22043               }
22044             else if (offset > 0)
22045               mem = gen_frame_mem (DImode,
22046                                    plus_constant (Pmode,
22047                                                   stack_pointer_rtx,
22048                                                   offset));
22049             else
22050               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22051
22052             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22053             RTX_FRAME_RELATED_P (tmp) = 1;
22054             tmp = emit_insn (tmp);
22055
22056             /* Record the first store insn.  */
22057             if (dwarf_index == 1)
22058               insn = tmp;
22059
22060             /* Generate dwarf info.  */
22061             mem = gen_frame_mem (SImode,
22062                                  plus_constant (Pmode,
22063                                                 stack_pointer_rtx,
22064                                                 offset));
22065             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22066             RTX_FRAME_RELATED_P (tmp) = 1;
22067             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22068
22069             mem = gen_frame_mem (SImode,
22070                                  plus_constant (Pmode,
22071                                                 stack_pointer_rtx,
22072                                                 offset + 4));
22073             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22074             RTX_FRAME_RELATED_P (tmp) = 1;
22075             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22076
22077             offset += 8;
22078             j += 2;
22079           }
22080         else
22081           {
22082             /* Emit a single word store.  */
22083             if (offset < 0)
22084               {
22085                 /* Allocate stack space for all saved registers.  */
22086                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22087                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22088                 mem = gen_frame_mem (SImode, tmp);
22089                 offset = 0;
22090               }
22091             else if (offset > 0)
22092               mem = gen_frame_mem (SImode,
22093                                    plus_constant (Pmode,
22094                                                   stack_pointer_rtx,
22095                                                   offset));
22096             else
22097               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22098
22099             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22100             RTX_FRAME_RELATED_P (tmp) = 1;
22101             tmp = emit_insn (tmp);
22102
22103             /* Record the first store insn.  */
22104             if (dwarf_index == 1)
22105               insn = tmp;
22106
22107             /* Generate dwarf info.  */
22108             mem = gen_frame_mem (SImode,
22109                                  plus_constant(Pmode,
22110                                                stack_pointer_rtx,
22111                                                offset));
22112             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22113             RTX_FRAME_RELATED_P (tmp) = 1;
22114             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22115
22116             offset += 4;
22117             j += 1;
22118           }
22119       }
22120     else
22121       j++;
22122
22123   /* Attach dwarf info to the first insn we generate.  */
22124   gcc_assert (insn != NULL_RTX);
22125   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22126   RTX_FRAME_RELATED_P (insn) = 1;
22127 }
22128
22129 /* Generate and emit an insn that we will recognize as a push_multi.
22130    Unfortunately, since this insn does not reflect very well the actual
22131    semantics of the operation, we need to annotate the insn for the benefit
22132    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
22133    MASK for registers that should be annotated for DWARF2 frame unwind
22134    information.  */
22135 static rtx
22136 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22137 {
22138   int num_regs = 0;
22139   int num_dwarf_regs = 0;
22140   int i, j;
22141   rtx par;
22142   rtx dwarf;
22143   int dwarf_par_index;
22144   rtx tmp, reg;
22145
22146   /* We don't record the PC in the dwarf frame information.  */
22147   dwarf_regs_mask &= ~(1 << PC_REGNUM);
22148
22149   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22150     {
22151       if (mask & (1 << i))
22152         num_regs++;
22153       if (dwarf_regs_mask & (1 << i))
22154         num_dwarf_regs++;
22155     }
22156
22157   gcc_assert (num_regs && num_regs <= 16);
22158   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22159
22160   /* For the body of the insn we are going to generate an UNSPEC in
22161      parallel with several USEs.  This allows the insn to be recognized
22162      by the push_multi pattern in the arm.md file.
22163
22164      The body of the insn looks something like this:
22165
22166        (parallel [
22167            (set (mem:BLK (pre_modify:SI (reg:SI sp)
22168                                         (const_int:SI <num>)))
22169                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22170            (use (reg:SI XX))
22171            (use (reg:SI YY))
22172            ...
22173         ])
22174
22175      For the frame note however, we try to be more explicit and actually
22176      show each register being stored into the stack frame, plus a (single)
22177      decrement of the stack pointer.  We do it this way in order to be
22178      friendly to the stack unwinding code, which only wants to see a single
22179      stack decrement per instruction.  The RTL we generate for the note looks
22180      something like this:
22181
22182       (sequence [
22183            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22184            (set (mem:SI (reg:SI sp)) (reg:SI r4))
22185            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22186            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22187            ...
22188         ])
22189
22190      FIXME:: In an ideal world the PRE_MODIFY would not exist and
22191      instead we'd have a parallel expression detailing all
22192      the stores to the various memory addresses so that debug
22193      information is more up-to-date. Remember however while writing
22194      this to take care of the constraints with the push instruction.
22195
22196      Note also that this has to be taken care of for the VFP registers.
22197
22198      For more see PR43399.  */
22199
22200   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22201   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22202   dwarf_par_index = 1;
22203
22204   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22205     {
22206       if (mask & (1 << i))
22207         {
22208           reg = gen_rtx_REG (SImode, i);
22209
22210           XVECEXP (par, 0, 0)
22211             = gen_rtx_SET (gen_frame_mem
22212                            (BLKmode,
22213                             gen_rtx_PRE_MODIFY (Pmode,
22214                                                 stack_pointer_rtx,
22215                                                 plus_constant
22216                                                 (Pmode, stack_pointer_rtx,
22217                                                  -4 * num_regs))
22218                             ),
22219                            gen_rtx_UNSPEC (BLKmode,
22220                                            gen_rtvec (1, reg),
22221                                            UNSPEC_PUSH_MULT));
22222
22223           if (dwarf_regs_mask & (1 << i))
22224             {
22225               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22226                                  reg);
22227               RTX_FRAME_RELATED_P (tmp) = 1;
22228               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22229             }
22230
22231           break;
22232         }
22233     }
22234
22235   for (j = 1, i++; j < num_regs; i++)
22236     {
22237       if (mask & (1 << i))
22238         {
22239           reg = gen_rtx_REG (SImode, i);
22240
22241           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22242
22243           if (dwarf_regs_mask & (1 << i))
22244             {
22245               tmp
22246                 = gen_rtx_SET (gen_frame_mem
22247                                (SImode,
22248                                 plus_constant (Pmode, stack_pointer_rtx,
22249                                                4 * j)),
22250                                reg);
22251               RTX_FRAME_RELATED_P (tmp) = 1;
22252               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22253             }
22254
22255           j++;
22256         }
22257     }
22258
22259   par = emit_insn (par);
22260
22261   tmp = gen_rtx_SET (stack_pointer_rtx,
22262                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22263   RTX_FRAME_RELATED_P (tmp) = 1;
22264   XVECEXP (dwarf, 0, 0) = tmp;
22265
22266   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22267
22268   return par;
22269 }
22270
22271 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22272    SIZE is the offset to be adjusted.
22273    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22274 static void
22275 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22276 {
22277   rtx dwarf;
22278
22279   RTX_FRAME_RELATED_P (insn) = 1;
22280   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22281   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22282 }
22283
22284 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22285    SAVED_REGS_MASK shows which registers need to be restored.
22286
22287    Unfortunately, since this insn does not reflect very well the actual
22288    semantics of the operation, we need to annotate the insn for the benefit
22289    of DWARF2 frame unwind information.  */
22290 static void
22291 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22292 {
22293   int num_regs = 0;
22294   int i, j;
22295   rtx par;
22296   rtx dwarf = NULL_RTX;
22297   rtx tmp, reg;
22298   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22299   int offset_adj;
22300   int emit_update;
22301
22302   offset_adj = return_in_pc ? 1 : 0;
22303   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22304     if (saved_regs_mask & (1 << i))
22305       num_regs++;
22306
22307   gcc_assert (num_regs && num_regs <= 16);
22308
22309   /* If SP is in reglist, then we don't emit SP update insn.  */
22310   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22311
22312   /* The parallel needs to hold num_regs SETs
22313      and one SET for the stack update.  */
22314   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22315
22316   if (return_in_pc)
22317     XVECEXP (par, 0, 0) = ret_rtx;
22318
22319   if (emit_update)
22320     {
22321       /* Increment the stack pointer, based on there being
22322          num_regs 4-byte registers to restore.  */
22323       tmp = gen_rtx_SET (stack_pointer_rtx,
22324                          plus_constant (Pmode,
22325                                         stack_pointer_rtx,
22326                                         4 * num_regs));
22327       RTX_FRAME_RELATED_P (tmp) = 1;
22328       XVECEXP (par, 0, offset_adj) = tmp;
22329     }
22330
22331   /* Now restore every reg, which may include PC.  */
22332   for (j = 0, i = 0; j < num_regs; i++)
22333     if (saved_regs_mask & (1 << i))
22334       {
22335         reg = gen_rtx_REG (SImode, i);
22336         if ((num_regs == 1) && emit_update && !return_in_pc)
22337           {
22338             /* Emit single load with writeback.  */
22339             tmp = gen_frame_mem (SImode,
22340                                  gen_rtx_POST_INC (Pmode,
22341                                                    stack_pointer_rtx));
22342             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22343             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22344             return;
22345           }
22346
22347         tmp = gen_rtx_SET (reg,
22348                            gen_frame_mem
22349                            (SImode,
22350                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22351         RTX_FRAME_RELATED_P (tmp) = 1;
22352         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22353
22354         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22355            should not have PC, skip PC.  */
22356         if (i != PC_REGNUM)
22357           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22358
22359         j++;
22360       }
22361
22362   if (return_in_pc)
22363     par = emit_jump_insn (par);
22364   else
22365     par = emit_insn (par);
22366
22367   REG_NOTES (par) = dwarf;
22368   if (!return_in_pc)
22369     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22370                                  stack_pointer_rtx, stack_pointer_rtx);
22371 }
22372
22373 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22374    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22375
22376    Unfortunately, since this insn does not reflect very well the actual
22377    semantics of the operation, we need to annotate the insn for the benefit
22378    of DWARF2 frame unwind information.  */
22379 static void
22380 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22381 {
22382   int i, j;
22383   rtx par;
22384   rtx dwarf = NULL_RTX;
22385   rtx tmp, reg;
22386
22387   gcc_assert (num_regs && num_regs <= 32);
22388
22389     /* Workaround ARM10 VFPr1 bug.  */
22390   if (num_regs == 2 && !arm_arch6)
22391     {
22392       if (first_reg == 15)
22393         first_reg--;
22394
22395       num_regs++;
22396     }
22397
22398   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22399      there could be up to 32 D-registers to restore.
22400      If there are more than 16 D-registers, make two recursive calls,
22401      each of which emits one pop_multi instruction.  */
22402   if (num_regs > 16)
22403     {
22404       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22405       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22406       return;
22407     }
22408
22409   /* The parallel needs to hold num_regs SETs
22410      and one SET for the stack update.  */
22411   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22412
22413   /* Increment the stack pointer, based on there being
22414      num_regs 8-byte registers to restore.  */
22415   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22416   RTX_FRAME_RELATED_P (tmp) = 1;
22417   XVECEXP (par, 0, 0) = tmp;
22418
22419   /* Now show every reg that will be restored, using a SET for each.  */
22420   for (j = 0, i=first_reg; j < num_regs; i += 2)
22421     {
22422       reg = gen_rtx_REG (DFmode, i);
22423
22424       tmp = gen_rtx_SET (reg,
22425                          gen_frame_mem
22426                          (DFmode,
22427                           plus_constant (Pmode, base_reg, 8 * j)));
22428       RTX_FRAME_RELATED_P (tmp) = 1;
22429       XVECEXP (par, 0, j + 1) = tmp;
22430
22431       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22432
22433       j++;
22434     }
22435
22436   par = emit_insn (par);
22437   REG_NOTES (par) = dwarf;
22438
22439   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22440   if (REGNO (base_reg) == IP_REGNUM)
22441     {
22442       RTX_FRAME_RELATED_P (par) = 1;
22443       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22444     }
22445   else
22446     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22447                                  base_reg, base_reg);
22448 }
22449
22450 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22451    number of registers are being popped, multiple LDRD patterns are created for
22452    all register pairs.  If odd number of registers are popped, last register is
22453    loaded by using LDR pattern.  */
22454 static void
22455 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22456 {
22457   int num_regs = 0;
22458   int i, j;
22459   rtx par = NULL_RTX;
22460   rtx dwarf = NULL_RTX;
22461   rtx tmp, reg, tmp1;
22462   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22463
22464   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22465     if (saved_regs_mask & (1 << i))
22466       num_regs++;
22467
22468   gcc_assert (num_regs && num_regs <= 16);
22469
22470   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22471      to be popped.  So, if num_regs is even, now it will become odd,
22472      and we can generate pop with PC.  If num_regs is odd, it will be
22473      even now, and ldr with return can be generated for PC.  */
22474   if (return_in_pc)
22475     num_regs--;
22476
22477   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22478
22479   /* Var j iterates over all the registers to gather all the registers in
22480      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22481      A PARALLEL RTX of register-pair is created here, so that pattern for
22482      LDRD can be matched.  As PC is always last register to be popped, and
22483      we have already decremented num_regs if PC, we don't have to worry
22484      about PC in this loop.  */
22485   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22486     if (saved_regs_mask & (1 << j))
22487       {
22488         /* Create RTX for memory load.  */
22489         reg = gen_rtx_REG (SImode, j);
22490         tmp = gen_rtx_SET (reg,
22491                            gen_frame_mem (SImode,
22492                                plus_constant (Pmode,
22493                                               stack_pointer_rtx, 4 * i)));
22494         RTX_FRAME_RELATED_P (tmp) = 1;
22495
22496         if (i % 2 == 0)
22497           {
22498             /* When saved-register index (i) is even, the RTX to be emitted is
22499                yet to be created.  Hence create it first.  The LDRD pattern we
22500                are generating is :
22501                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22502                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22503                where target registers need not be consecutive.  */
22504             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22505             dwarf = NULL_RTX;
22506           }
22507
22508         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22509            added as 0th element and if i is odd, reg_i is added as 1st element
22510            of LDRD pattern shown above.  */
22511         XVECEXP (par, 0, (i % 2)) = tmp;
22512         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22513
22514         if ((i % 2) == 1)
22515           {
22516             /* When saved-register index (i) is odd, RTXs for both the registers
22517                to be loaded are generated in above given LDRD pattern, and the
22518                pattern can be emitted now.  */
22519             par = emit_insn (par);
22520             REG_NOTES (par) = dwarf;
22521             RTX_FRAME_RELATED_P (par) = 1;
22522           }
22523
22524         i++;
22525       }
22526
22527   /* If the number of registers pushed is odd AND return_in_pc is false OR
22528      number of registers are even AND return_in_pc is true, last register is
22529      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22530      then LDR with post increment.  */
22531
22532   /* Increment the stack pointer, based on there being
22533      num_regs 4-byte registers to restore.  */
22534   tmp = gen_rtx_SET (stack_pointer_rtx,
22535                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22536   RTX_FRAME_RELATED_P (tmp) = 1;
22537   tmp = emit_insn (tmp);
22538   if (!return_in_pc)
22539     {
22540       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22541                                    stack_pointer_rtx, stack_pointer_rtx);
22542     }
22543
22544   dwarf = NULL_RTX;
22545
22546   if (((num_regs % 2) == 1 && !return_in_pc)
22547       || ((num_regs % 2) == 0 && return_in_pc))
22548     {
22549       /* Scan for the single register to be popped.  Skip until the saved
22550          register is found.  */
22551       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22552
22553       /* Gen LDR with post increment here.  */
22554       tmp1 = gen_rtx_MEM (SImode,
22555                           gen_rtx_POST_INC (SImode,
22556                                             stack_pointer_rtx));
22557       set_mem_alias_set (tmp1, get_frame_alias_set ());
22558
22559       reg = gen_rtx_REG (SImode, j);
22560       tmp = gen_rtx_SET (reg, tmp1);
22561       RTX_FRAME_RELATED_P (tmp) = 1;
22562       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22563
22564       if (return_in_pc)
22565         {
22566           /* If return_in_pc, j must be PC_REGNUM.  */
22567           gcc_assert (j == PC_REGNUM);
22568           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22569           XVECEXP (par, 0, 0) = ret_rtx;
22570           XVECEXP (par, 0, 1) = tmp;
22571           par = emit_jump_insn (par);
22572         }
22573       else
22574         {
22575           par = emit_insn (tmp);
22576           REG_NOTES (par) = dwarf;
22577           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22578                                        stack_pointer_rtx, stack_pointer_rtx);
22579         }
22580
22581     }
22582   else if ((num_regs % 2) == 1 && return_in_pc)
22583     {
22584       /* There are 2 registers to be popped.  So, generate the pattern
22585          pop_multiple_with_stack_update_and_return to pop in PC.  */
22586       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22587     }
22588
22589   return;
22590 }
22591
22592 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22593    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22594    offset addressing and then generates one separate stack udpate. This provides
22595    more scheduling freedom, compared to writeback on every load.  However,
22596    if the function returns using load into PC directly
22597    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22598    before the last load.  TODO: Add a peephole optimization to recognize
22599    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22600    peephole optimization to merge the load at stack-offset zero
22601    with the stack update instruction using load with writeback
22602    in post-index addressing mode.  */
22603 static void
22604 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22605 {
22606   int j = 0;
22607   int offset = 0;
22608   rtx par = NULL_RTX;
22609   rtx dwarf = NULL_RTX;
22610   rtx tmp, mem;
22611
22612   /* Restore saved registers.  */
22613   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22614   j = 0;
22615   while (j <= LAST_ARM_REGNUM)
22616     if (saved_regs_mask & (1 << j))
22617       {
22618         if ((j % 2) == 0
22619             && (saved_regs_mask & (1 << (j + 1)))
22620             && (j + 1) != PC_REGNUM)
22621           {
22622             /* Current register and next register form register pair for which
22623                LDRD can be generated. PC is always the last register popped, and
22624                we handle it separately.  */
22625             if (offset > 0)
22626               mem = gen_frame_mem (DImode,
22627                                    plus_constant (Pmode,
22628                                                   stack_pointer_rtx,
22629                                                   offset));
22630             else
22631               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22632
22633             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22634             tmp = emit_insn (tmp);
22635             RTX_FRAME_RELATED_P (tmp) = 1;
22636
22637             /* Generate dwarf info.  */
22638
22639             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22640                                     gen_rtx_REG (SImode, j),
22641                                     NULL_RTX);
22642             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22643                                     gen_rtx_REG (SImode, j + 1),
22644                                     dwarf);
22645
22646             REG_NOTES (tmp) = dwarf;
22647
22648             offset += 8;
22649             j += 2;
22650           }
22651         else if (j != PC_REGNUM)
22652           {
22653             /* Emit a single word load.  */
22654             if (offset > 0)
22655               mem = gen_frame_mem (SImode,
22656                                    plus_constant (Pmode,
22657                                                   stack_pointer_rtx,
22658                                                   offset));
22659             else
22660               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22661
22662             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22663             tmp = emit_insn (tmp);
22664             RTX_FRAME_RELATED_P (tmp) = 1;
22665
22666             /* Generate dwarf info.  */
22667             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22668                                               gen_rtx_REG (SImode, j),
22669                                               NULL_RTX);
22670
22671             offset += 4;
22672             j += 1;
22673           }
22674         else /* j == PC_REGNUM */
22675           j++;
22676       }
22677     else
22678       j++;
22679
22680   /* Update the stack.  */
22681   if (offset > 0)
22682     {
22683       tmp = gen_rtx_SET (stack_pointer_rtx,
22684                          plus_constant (Pmode,
22685                                         stack_pointer_rtx,
22686                                         offset));
22687       tmp = emit_insn (tmp);
22688       arm_add_cfa_adjust_cfa_note (tmp, offset,
22689                                    stack_pointer_rtx, stack_pointer_rtx);
22690       offset = 0;
22691     }
22692
22693   if (saved_regs_mask & (1 << PC_REGNUM))
22694     {
22695       /* Only PC is to be popped.  */
22696       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22697       XVECEXP (par, 0, 0) = ret_rtx;
22698       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22699                          gen_frame_mem (SImode,
22700                                         gen_rtx_POST_INC (SImode,
22701                                                           stack_pointer_rtx)));
22702       RTX_FRAME_RELATED_P (tmp) = 1;
22703       XVECEXP (par, 0, 1) = tmp;
22704       par = emit_jump_insn (par);
22705
22706       /* Generate dwarf info.  */
22707       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22708                               gen_rtx_REG (SImode, PC_REGNUM),
22709                               NULL_RTX);
22710       REG_NOTES (par) = dwarf;
22711       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22712                                    stack_pointer_rtx, stack_pointer_rtx);
22713     }
22714 }
22715
22716 /* Calculate the size of the return value that is passed in registers.  */
22717 static unsigned
22718 arm_size_return_regs (void)
22719 {
22720   machine_mode mode;
22721
22722   if (crtl->return_rtx != 0)
22723     mode = GET_MODE (crtl->return_rtx);
22724   else
22725     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22726
22727   return GET_MODE_SIZE (mode);
22728 }
22729
22730 /* Return true if the current function needs to save/restore LR.  */
22731 static bool
22732 thumb_force_lr_save (void)
22733 {
22734   return !cfun->machine->lr_save_eliminated
22735          && (!crtl->is_leaf
22736              || thumb_far_jump_used_p ()
22737              || df_regs_ever_live_p (LR_REGNUM));
22738 }
22739
22740 /* We do not know if r3 will be available because
22741    we do have an indirect tailcall happening in this
22742    particular case.  */
22743 static bool
22744 is_indirect_tailcall_p (rtx call)
22745 {
22746   rtx pat = PATTERN (call);
22747
22748   /* Indirect tail call.  */
22749   pat = XVECEXP (pat, 0, 0);
22750   if (GET_CODE (pat) == SET)
22751     pat = SET_SRC (pat);
22752
22753   pat = XEXP (XEXP (pat, 0), 0);
22754   return REG_P (pat);
22755 }
22756
22757 /* Return true if r3 is used by any of the tail call insns in the
22758    current function.  */
22759 static bool
22760 any_sibcall_could_use_r3 (void)
22761 {
22762   edge_iterator ei;
22763   edge e;
22764
22765   if (!crtl->tail_call_emit)
22766     return false;
22767   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22768     if (e->flags & EDGE_SIBCALL)
22769       {
22770         rtx_insn *call = BB_END (e->src);
22771         if (!CALL_P (call))
22772           call = prev_nonnote_nondebug_insn (call);
22773         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22774         if (find_regno_fusage (call, USE, 3)
22775             || is_indirect_tailcall_p (call))
22776           return true;
22777       }
22778   return false;
22779 }
22780
22781
22782 /* Compute the distance from register FROM to register TO.
22783    These can be the arg pointer (26), the soft frame pointer (25),
22784    the stack pointer (13) or the hard frame pointer (11).
22785    In thumb mode r7 is used as the soft frame pointer, if needed.
22786    Typical stack layout looks like this:
22787
22788        old stack pointer -> |    |
22789                              ----
22790                             |    | \
22791                             |    |   saved arguments for
22792                             |    |   vararg functions
22793                             |    | /
22794                               --
22795    hard FP & arg pointer -> |    | \
22796                             |    |   stack
22797                             |    |   frame
22798                             |    | /
22799                               --
22800                             |    | \
22801                             |    |   call saved
22802                             |    |   registers
22803       soft frame pointer -> |    | /
22804                               --
22805                             |    | \
22806                             |    |   local
22807                             |    |   variables
22808      locals base pointer -> |    | /
22809                               --
22810                             |    | \
22811                             |    |   outgoing
22812                             |    |   arguments
22813    current stack pointer -> |    | /
22814                               --
22815
22816   For a given function some or all of these stack components
22817   may not be needed, giving rise to the possibility of
22818   eliminating some of the registers.
22819
22820   The values returned by this function must reflect the behavior
22821   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22822
22823   The sign of the number returned reflects the direction of stack
22824   growth, so the values are positive for all eliminations except
22825   from the soft frame pointer to the hard frame pointer.
22826
22827   SFP may point just inside the local variables block to ensure correct
22828   alignment.  */
22829
22830
22831 /* Return cached stack offsets.  */
22832
22833 static arm_stack_offsets *
22834 arm_get_frame_offsets (void)
22835 {
22836   struct arm_stack_offsets *offsets;
22837
22838   offsets = &cfun->machine->stack_offsets;
22839
22840   return offsets;
22841 }
22842
22843
22844 /* Calculate stack offsets.  These are used to calculate register elimination
22845    offsets and in prologue/epilogue code.  Also calculates which registers
22846    should be saved.  */
22847
22848 static void
22849 arm_compute_frame_layout (void)
22850 {
22851   struct arm_stack_offsets *offsets;
22852   unsigned long func_type;
22853   int saved;
22854   int core_saved;
22855   HOST_WIDE_INT frame_size;
22856   int i;
22857
22858   offsets = &cfun->machine->stack_offsets;
22859
22860   /* Initially this is the size of the local variables.  It will translated
22861      into an offset once we have determined the size of preceding data.  */
22862   frame_size = ROUND_UP_WORD (get_frame_size ());
22863
22864   /* Space for variadic functions.  */
22865   offsets->saved_args = crtl->args.pretend_args_size;
22866
22867   /* In Thumb mode this is incorrect, but never used.  */
22868   offsets->frame
22869     = (offsets->saved_args
22870        + arm_compute_static_chain_stack_bytes ()
22871        + (frame_pointer_needed ? 4 : 0));
22872
22873   if (TARGET_32BIT)
22874     {
22875       unsigned int regno;
22876
22877       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22878       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22879       saved = core_saved;
22880
22881       /* We know that SP will be doubleword aligned on entry, and we must
22882          preserve that condition at any subroutine call.  We also require the
22883          soft frame pointer to be doubleword aligned.  */
22884
22885       if (TARGET_REALLY_IWMMXT)
22886         {
22887           /* Check for the call-saved iWMMXt registers.  */
22888           for (regno = FIRST_IWMMXT_REGNUM;
22889                regno <= LAST_IWMMXT_REGNUM;
22890                regno++)
22891             if (reg_needs_saving_p (regno))
22892               saved += 8;
22893         }
22894
22895       func_type = arm_current_func_type ();
22896       /* Space for saved VFP registers.  */
22897       if (! IS_VOLATILE (func_type)
22898           && TARGET_VFP_BASE)
22899         saved += arm_get_vfp_saved_size ();
22900
22901       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22902          nonecure entry functions with VSTR/VLDR.  */
22903       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22904         saved += 4;
22905     }
22906   else /* TARGET_THUMB1 */
22907     {
22908       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22909       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22910       saved = core_saved;
22911       if (TARGET_BACKTRACE)
22912         saved += 16;
22913     }
22914
22915   /* Saved registers include the stack frame.  */
22916   offsets->saved_regs
22917     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22918   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22919
22920   /* A leaf function does not need any stack alignment if it has nothing
22921      on the stack.  */
22922   if (crtl->is_leaf && frame_size == 0
22923       /* However if it calls alloca(), we have a dynamically allocated
22924          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
22925       && ! cfun->calls_alloca)
22926     {
22927       offsets->outgoing_args = offsets->soft_frame;
22928       offsets->locals_base = offsets->soft_frame;
22929       return;
22930     }
22931
22932   /* Ensure SFP has the correct alignment.  */
22933   if (ARM_DOUBLEWORD_ALIGN
22934       && (offsets->soft_frame & 7))
22935     {
22936       offsets->soft_frame += 4;
22937       /* Try to align stack by pushing an extra reg.  Don't bother doing this
22938          when there is a stack frame as the alignment will be rolled into
22939          the normal stack adjustment.  */
22940       if (frame_size + crtl->outgoing_args_size == 0)
22941         {
22942           int reg = -1;
22943
22944           /* Register r3 is caller-saved.  Normally it does not need to be
22945              saved on entry by the prologue.  However if we choose to save
22946              it for padding then we may confuse the compiler into thinking
22947              a prologue sequence is required when in fact it is not.  This
22948              will occur when shrink-wrapping if r3 is used as a scratch
22949              register and there are no other callee-saved writes.
22950
22951              This situation can be avoided when other callee-saved registers
22952              are available and r3 is not mandatory if we choose a callee-saved
22953              register for padding.  */
22954           bool prefer_callee_reg_p = false;
22955
22956           /* If it is safe to use r3, then do so.  This sometimes
22957              generates better code on Thumb-2 by avoiding the need to
22958              use 32-bit push/pop instructions.  */
22959           if (! any_sibcall_could_use_r3 ()
22960               && arm_size_return_regs () <= 12
22961               && (offsets->saved_regs_mask & (1 << 3)) == 0
22962               && (TARGET_THUMB2
22963                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22964             {
22965               reg = 3;
22966               if (!TARGET_THUMB2)
22967                 prefer_callee_reg_p = true;
22968             }
22969           if (reg == -1
22970               || prefer_callee_reg_p)
22971             {
22972               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22973                 {
22974                   /* Avoid fixed registers; they may be changed at
22975                      arbitrary times so it's unsafe to restore them
22976                      during the epilogue.  */
22977                   if (!fixed_regs[i]
22978                       && (offsets->saved_regs_mask & (1 << i)) == 0)
22979                     {
22980                       reg = i;
22981                       break;
22982                     }
22983                 }
22984             }
22985
22986           if (reg != -1)
22987             {
22988               offsets->saved_regs += 4;
22989               offsets->saved_regs_mask |= (1 << reg);
22990             }
22991         }
22992     }
22993
22994   offsets->locals_base = offsets->soft_frame + frame_size;
22995   offsets->outgoing_args = (offsets->locals_base
22996                             + crtl->outgoing_args_size);
22997
22998   if (ARM_DOUBLEWORD_ALIGN)
22999     {
23000       /* Ensure SP remains doubleword aligned.  */
23001       if (offsets->outgoing_args & 7)
23002         offsets->outgoing_args += 4;
23003       gcc_assert (!(offsets->outgoing_args & 7));
23004     }
23005 }
23006
23007
23008 /* Calculate the relative offsets for the different stack pointers.  Positive
23009    offsets are in the direction of stack growth.  */
23010
23011 HOST_WIDE_INT
23012 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23013 {
23014   arm_stack_offsets *offsets;
23015
23016   offsets = arm_get_frame_offsets ();
23017
23018   /* OK, now we have enough information to compute the distances.
23019      There must be an entry in these switch tables for each pair
23020      of registers in ELIMINABLE_REGS, even if some of the entries
23021      seem to be redundant or useless.  */
23022   switch (from)
23023     {
23024     case ARG_POINTER_REGNUM:
23025       switch (to)
23026         {
23027         case THUMB_HARD_FRAME_POINTER_REGNUM:
23028           return 0;
23029
23030         case FRAME_POINTER_REGNUM:
23031           /* This is the reverse of the soft frame pointer
23032              to hard frame pointer elimination below.  */
23033           return offsets->soft_frame - offsets->saved_args;
23034
23035         case ARM_HARD_FRAME_POINTER_REGNUM:
23036           /* This is only non-zero in the case where the static chain register
23037              is stored above the frame.  */
23038           return offsets->frame - offsets->saved_args - 4;
23039
23040         case STACK_POINTER_REGNUM:
23041           /* If nothing has been pushed on the stack at all
23042              then this will return -4.  This *is* correct!  */
23043           return offsets->outgoing_args - (offsets->saved_args + 4);
23044
23045         default:
23046           gcc_unreachable ();
23047         }
23048       gcc_unreachable ();
23049
23050     case FRAME_POINTER_REGNUM:
23051       switch (to)
23052         {
23053         case THUMB_HARD_FRAME_POINTER_REGNUM:
23054           return 0;
23055
23056         case ARM_HARD_FRAME_POINTER_REGNUM:
23057           /* The hard frame pointer points to the top entry in the
23058              stack frame.  The soft frame pointer to the bottom entry
23059              in the stack frame.  If there is no stack frame at all,
23060              then they are identical.  */
23061
23062           return offsets->frame - offsets->soft_frame;
23063
23064         case STACK_POINTER_REGNUM:
23065           return offsets->outgoing_args - offsets->soft_frame;
23066
23067         default:
23068           gcc_unreachable ();
23069         }
23070       gcc_unreachable ();
23071
23072     default:
23073       /* You cannot eliminate from the stack pointer.
23074          In theory you could eliminate from the hard frame
23075          pointer to the stack pointer, but this will never
23076          happen, since if a stack frame is not needed the
23077          hard frame pointer will never be used.  */
23078       gcc_unreachable ();
23079     }
23080 }
23081
23082 /* Given FROM and TO register numbers, say whether this elimination is
23083    allowed.  Frame pointer elimination is automatically handled.
23084
23085    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
23086    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
23087    pointer, we must eliminate FRAME_POINTER_REGNUM into
23088    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23089    ARG_POINTER_REGNUM.  */
23090
23091 bool
23092 arm_can_eliminate (const int from, const int to)
23093 {
23094   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23095           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23096           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23097           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23098            true);
23099 }
23100
23101 /* Emit RTL to save coprocessor registers on function entry.  Returns the
23102    number of bytes pushed.  */
23103
23104 static int
23105 arm_save_coproc_regs(void)
23106 {
23107   int saved_size = 0;
23108   unsigned reg;
23109   unsigned start_reg;
23110   rtx insn;
23111
23112   if (TARGET_REALLY_IWMMXT)
23113   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23114     if (reg_needs_saving_p (reg))
23115       {
23116         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23117         insn = gen_rtx_MEM (V2SImode, insn);
23118         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23119         RTX_FRAME_RELATED_P (insn) = 1;
23120         saved_size += 8;
23121       }
23122
23123   if (TARGET_VFP_BASE)
23124     {
23125       start_reg = FIRST_VFP_REGNUM;
23126
23127       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23128         {
23129           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23130             {
23131               if (start_reg != reg)
23132                 saved_size += vfp_emit_fstmd (start_reg,
23133                                               (reg - start_reg) / 2);
23134               start_reg = reg + 2;
23135             }
23136         }
23137       if (start_reg != reg)
23138         saved_size += vfp_emit_fstmd (start_reg,
23139                                       (reg - start_reg) / 2);
23140     }
23141   return saved_size;
23142 }
23143
23144
23145 /* Set the Thumb frame pointer from the stack pointer.  */
23146
23147 static void
23148 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23149 {
23150   HOST_WIDE_INT amount;
23151   rtx insn, dwarf;
23152
23153   amount = offsets->outgoing_args - offsets->locals_base;
23154   if (amount < 1024)
23155     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23156                                   stack_pointer_rtx, GEN_INT (amount)));
23157   else
23158     {
23159       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23160       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
23161          expects the first two operands to be the same.  */
23162       if (TARGET_THUMB2)
23163         {
23164           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23165                                         stack_pointer_rtx,
23166                                         hard_frame_pointer_rtx));
23167         }
23168       else
23169         {
23170           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23171                                         hard_frame_pointer_rtx,
23172                                         stack_pointer_rtx));
23173         }
23174       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23175                            plus_constant (Pmode, stack_pointer_rtx, amount));
23176       RTX_FRAME_RELATED_P (dwarf) = 1;
23177       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23178     }
23179
23180   RTX_FRAME_RELATED_P (insn) = 1;
23181 }
23182
23183 struct scratch_reg {
23184   rtx reg;
23185   bool saved;
23186 };
23187
23188 /* Return a short-lived scratch register for use as a 2nd scratch register on
23189    function entry after the registers are saved in the prologue.  This register
23190    must be released by means of release_scratch_register_on_entry.  IP is not
23191    considered since it is always used as the 1st scratch register if available.
23192
23193    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23194    mask of live registers.  */
23195
23196 static void
23197 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23198                                unsigned long live_regs)
23199 {
23200   int regno = -1;
23201
23202   sr->saved = false;
23203
23204   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23205     regno = LR_REGNUM;
23206   else
23207     {
23208       unsigned int i;
23209
23210       for (i = 4; i < 11; i++)
23211         if (regno1 != i && (live_regs & (1 << i)) != 0)
23212           {
23213             regno = i;
23214             break;
23215           }
23216
23217       if (regno < 0)
23218         {
23219           /* If IP is used as the 1st scratch register for a nested function,
23220              then either r3 wasn't available or is used to preserve IP.  */
23221           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23222             regno1 = 3;
23223           regno = (regno1 == 3 ? 2 : 3);
23224           sr->saved
23225             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23226                                regno);
23227         }
23228     }
23229
23230   sr->reg = gen_rtx_REG (SImode, regno);
23231   if (sr->saved)
23232     {
23233       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23234       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23235       rtx x = gen_rtx_SET (stack_pointer_rtx,
23236                            plus_constant (Pmode, stack_pointer_rtx, -4));
23237       RTX_FRAME_RELATED_P (insn) = 1;
23238       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23239     }
23240 }
23241
23242 /* Release a scratch register obtained from the preceding function.  */
23243
23244 static void
23245 release_scratch_register_on_entry (struct scratch_reg *sr)
23246 {
23247   if (sr->saved)
23248     {
23249       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23250       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23251       rtx x = gen_rtx_SET (stack_pointer_rtx,
23252                            plus_constant (Pmode, stack_pointer_rtx, 4));
23253       RTX_FRAME_RELATED_P (insn) = 1;
23254       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23255     }
23256 }
23257
23258 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23259
23260 #if PROBE_INTERVAL > 4096
23261 #error Cannot use indexed addressing mode for stack probing
23262 #endif
23263
23264 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23265    inclusive.  These are offsets from the current stack pointer.  REGNO1
23266    is the index number of the 1st scratch register and LIVE_REGS is the
23267    mask of live registers.  */
23268
23269 static void
23270 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23271                             unsigned int regno1, unsigned long live_regs)
23272 {
23273   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23274
23275   /* See if we have a constant small number of probes to generate.  If so,
23276      that's the easy case.  */
23277   if (size <= PROBE_INTERVAL)
23278     {
23279       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23280       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23281       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23282     }
23283
23284   /* The run-time loop is made up of 10 insns in the generic case while the
23285      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23286   else if (size <= 5 * PROBE_INTERVAL)
23287     {
23288       HOST_WIDE_INT i, rem;
23289
23290       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23291       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23292       emit_stack_probe (reg1);
23293
23294       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23295          it exceeds SIZE.  If only two probes are needed, this will not
23296          generate any code.  Then probe at FIRST + SIZE.  */
23297       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23298         {
23299           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23300           emit_stack_probe (reg1);
23301         }
23302
23303       rem = size - (i - PROBE_INTERVAL);
23304       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23305         {
23306           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23307           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23308         }
23309       else
23310         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23311     }
23312
23313   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23314      extra careful with variables wrapping around because we might be at
23315      the very top (or the very bottom) of the address space and we have
23316      to be able to handle this case properly; in particular, we use an
23317      equality test for the loop condition.  */
23318   else
23319     {
23320       HOST_WIDE_INT rounded_size;
23321       struct scratch_reg sr;
23322
23323       get_scratch_register_on_entry (&sr, regno1, live_regs);
23324
23325       emit_move_insn (reg1, GEN_INT (first));
23326
23327
23328       /* Step 1: round SIZE to the previous multiple of the interval.  */
23329
23330       rounded_size = size & -PROBE_INTERVAL;
23331       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23332
23333
23334       /* Step 2: compute initial and final value of the loop counter.  */
23335
23336       /* TEST_ADDR = SP + FIRST.  */
23337       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23338
23339       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23340       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23341
23342
23343       /* Step 3: the loop
23344
23345          do
23346            {
23347              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23348              probe at TEST_ADDR
23349            }
23350          while (TEST_ADDR != LAST_ADDR)
23351
23352          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23353          until it is equal to ROUNDED_SIZE.  */
23354
23355       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23356
23357
23358       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23359          that SIZE is equal to ROUNDED_SIZE.  */
23360
23361       if (size != rounded_size)
23362         {
23363           HOST_WIDE_INT rem = size - rounded_size;
23364
23365           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23366             {
23367               emit_set_insn (sr.reg,
23368                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23369               emit_stack_probe (plus_constant (Pmode, sr.reg,
23370                                                PROBE_INTERVAL - rem));
23371             }
23372           else
23373             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23374         }
23375
23376       release_scratch_register_on_entry (&sr);
23377     }
23378
23379   /* Make sure nothing is scheduled before we are done.  */
23380   emit_insn (gen_blockage ());
23381 }
23382
23383 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23384    absolute addresses.  */
23385
23386 const char *
23387 output_probe_stack_range (rtx reg1, rtx reg2)
23388 {
23389   static int labelno = 0;
23390   char loop_lab[32];
23391   rtx xops[2];
23392
23393   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23394
23395   /* Loop.  */
23396   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23397
23398   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23399   xops[0] = reg1;
23400   xops[1] = GEN_INT (PROBE_INTERVAL);
23401   output_asm_insn ("sub\t%0, %0, %1", xops);
23402
23403   /* Probe at TEST_ADDR.  */
23404   output_asm_insn ("str\tr0, [%0, #0]", xops);
23405
23406   /* Test if TEST_ADDR == LAST_ADDR.  */
23407   xops[1] = reg2;
23408   output_asm_insn ("cmp\t%0, %1", xops);
23409
23410   /* Branch.  */
23411   fputs ("\tbne\t", asm_out_file);
23412   assemble_name_raw (asm_out_file, loop_lab);
23413   fputc ('\n', asm_out_file);
23414
23415   return "";
23416 }
23417
23418 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23419    function.  */
23420 void
23421 arm_expand_prologue (void)
23422 {
23423   rtx amount;
23424   rtx insn;
23425   rtx ip_rtx;
23426   unsigned long live_regs_mask;
23427   unsigned long func_type;
23428   int fp_offset = 0;
23429   int saved_pretend_args = 0;
23430   int saved_regs = 0;
23431   unsigned HOST_WIDE_INT args_to_push;
23432   HOST_WIDE_INT size;
23433   arm_stack_offsets *offsets;
23434   bool clobber_ip;
23435
23436   func_type = arm_current_func_type ();
23437
23438   /* Naked functions don't have prologues.  */
23439   if (IS_NAKED (func_type))
23440     {
23441       if (flag_stack_usage_info)
23442         current_function_static_stack_size = 0;
23443       return;
23444     }
23445
23446   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23447   args_to_push = crtl->args.pretend_args_size;
23448
23449   /* Compute which register we will have to save onto the stack.  */
23450   offsets = arm_get_frame_offsets ();
23451   live_regs_mask = offsets->saved_regs_mask;
23452
23453   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23454
23455   if (IS_STACKALIGN (func_type))
23456     {
23457       rtx r0, r1;
23458
23459       /* Handle a word-aligned stack pointer.  We generate the following:
23460
23461           mov r0, sp
23462           bic r1, r0, #7
23463           mov sp, r1
23464           <save and restore r0 in normal prologue/epilogue>
23465           mov sp, r0
23466           bx lr
23467
23468          The unwinder doesn't need to know about the stack realignment.
23469          Just tell it we saved SP in r0.  */
23470       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23471
23472       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23473       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23474
23475       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23476       RTX_FRAME_RELATED_P (insn) = 1;
23477       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23478
23479       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23480
23481       /* ??? The CFA changes here, which may cause GDB to conclude that it
23482          has entered a different function.  That said, the unwind info is
23483          correct, individually, before and after this instruction because
23484          we've described the save of SP, which will override the default
23485          handling of SP as restoring from the CFA.  */
23486       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23487     }
23488
23489   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23490      now the value must be -1 as stored by arm_init_machine_status ().  */
23491   cfun->machine->static_chain_stack_bytes
23492     = arm_compute_static_chain_stack_bytes ();
23493
23494   /* The static chain register is the same as the IP register.  If it is
23495      clobbered when creating the frame, we need to save and restore it.  */
23496   clobber_ip = IS_NESTED (func_type)
23497                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23498                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23499                         || flag_stack_clash_protection)
23500                        && !df_regs_ever_live_p (LR_REGNUM)
23501                        && arm_r3_live_at_start_p ()));
23502
23503   /* Find somewhere to store IP whilst the frame is being created.
23504      We try the following places in order:
23505
23506        1. The last argument register r3 if it is available.
23507        2. A slot on the stack above the frame if there are no
23508           arguments to push onto the stack.
23509        3. Register r3 again, after pushing the argument registers
23510           onto the stack, if this is a varargs function.
23511        4. The last slot on the stack created for the arguments to
23512           push, if this isn't a varargs function.
23513
23514      Note - we only need to tell the dwarf2 backend about the SP
23515      adjustment in the second variant; the static chain register
23516      doesn't need to be unwound, as it doesn't contain a value
23517      inherited from the caller.  */
23518   if (clobber_ip)
23519     {
23520       if (!arm_r3_live_at_start_p ())
23521         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23522       else if (args_to_push == 0)
23523         {
23524           rtx addr, dwarf;
23525
23526           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23527           saved_regs += 4;
23528
23529           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23530           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23531           fp_offset = 4;
23532
23533           /* Just tell the dwarf backend that we adjusted SP.  */
23534           dwarf = gen_rtx_SET (stack_pointer_rtx,
23535                                plus_constant (Pmode, stack_pointer_rtx,
23536                                               -fp_offset));
23537           RTX_FRAME_RELATED_P (insn) = 1;
23538           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23539         }
23540       else
23541         {
23542           /* Store the args on the stack.  */
23543           if (cfun->machine->uses_anonymous_args)
23544             {
23545               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23546                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23547               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23548               saved_pretend_args = 1;
23549             }
23550           else
23551             {
23552               rtx addr, dwarf;
23553
23554               if (args_to_push == 4)
23555                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23556               else
23557                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23558                                            plus_constant (Pmode,
23559                                                           stack_pointer_rtx,
23560                                                           -args_to_push));
23561
23562               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23563
23564               /* Just tell the dwarf backend that we adjusted SP.  */
23565               dwarf = gen_rtx_SET (stack_pointer_rtx,
23566                                    plus_constant (Pmode, stack_pointer_rtx,
23567                                                   -args_to_push));
23568               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23569             }
23570
23571           RTX_FRAME_RELATED_P (insn) = 1;
23572           fp_offset = args_to_push;
23573           args_to_push = 0;
23574         }
23575     }
23576
23577   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23578     {
23579       if (IS_INTERRUPT (func_type))
23580         {
23581           /* Interrupt functions must not corrupt any registers.
23582              Creating a frame pointer however, corrupts the IP
23583              register, so we must push it first.  */
23584           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23585
23586           /* Do not set RTX_FRAME_RELATED_P on this insn.
23587              The dwarf stack unwinding code only wants to see one
23588              stack decrement per function, and this is not it.  If
23589              this instruction is labeled as being part of the frame
23590              creation sequence then dwarf2out_frame_debug_expr will
23591              die when it encounters the assignment of IP to FP
23592              later on, since the use of SP here establishes SP as
23593              the CFA register and not IP.
23594
23595              Anyway this instruction is not really part of the stack
23596              frame creation although it is part of the prologue.  */
23597         }
23598
23599       insn = emit_set_insn (ip_rtx,
23600                             plus_constant (Pmode, stack_pointer_rtx,
23601                                            fp_offset));
23602       RTX_FRAME_RELATED_P (insn) = 1;
23603     }
23604
23605   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23606   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23607     {
23608       saved_regs += 4;
23609       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23610                                                 GEN_INT (FPCXTNS_ENUM)));
23611       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23612                           plus_constant (Pmode, stack_pointer_rtx, -4));
23613       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23614       RTX_FRAME_RELATED_P (insn) = 1;
23615     }
23616
23617   if (args_to_push)
23618     {
23619       /* Push the argument registers, or reserve space for them.  */
23620       if (cfun->machine->uses_anonymous_args)
23621         insn = emit_multi_reg_push
23622           ((0xf0 >> (args_to_push / 4)) & 0xf,
23623            (0xf0 >> (args_to_push / 4)) & 0xf);
23624       else
23625         insn = emit_insn
23626           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23627                        GEN_INT (- args_to_push)));
23628       RTX_FRAME_RELATED_P (insn) = 1;
23629     }
23630
23631   /* If this is an interrupt service routine, and the link register
23632      is going to be pushed, and we're not generating extra
23633      push of IP (needed when frame is needed and frame layout if apcs),
23634      subtracting four from LR now will mean that the function return
23635      can be done with a single instruction.  */
23636   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23637       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23638       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23639       && TARGET_ARM)
23640     {
23641       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23642
23643       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23644     }
23645
23646   if (live_regs_mask)
23647     {
23648       unsigned long dwarf_regs_mask = live_regs_mask;
23649
23650       saved_regs += bit_count (live_regs_mask) * 4;
23651       if (optimize_size && !frame_pointer_needed
23652           && saved_regs == offsets->saved_regs - offsets->saved_args)
23653         {
23654           /* If no coprocessor registers are being pushed and we don't have
23655              to worry about a frame pointer then push extra registers to
23656              create the stack frame.  This is done in a way that does not
23657              alter the frame layout, so is independent of the epilogue.  */
23658           int n;
23659           int frame;
23660           n = 0;
23661           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23662             n++;
23663           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23664           if (frame && n * 4 >= frame)
23665             {
23666               n = frame / 4;
23667               live_regs_mask |= (1 << n) - 1;
23668               saved_regs += frame;
23669             }
23670         }
23671
23672       if (TARGET_LDRD
23673           && current_tune->prefer_ldrd_strd
23674           && !optimize_function_for_size_p (cfun))
23675         {
23676           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23677           if (TARGET_THUMB2)
23678             thumb2_emit_strd_push (live_regs_mask);
23679           else if (TARGET_ARM
23680                    && !TARGET_APCS_FRAME
23681                    && !IS_INTERRUPT (func_type))
23682             arm_emit_strd_push (live_regs_mask);
23683           else
23684             {
23685               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23686               RTX_FRAME_RELATED_P (insn) = 1;
23687             }
23688         }
23689       else
23690         {
23691           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23692           RTX_FRAME_RELATED_P (insn) = 1;
23693         }
23694     }
23695
23696   if (! IS_VOLATILE (func_type))
23697     saved_regs += arm_save_coproc_regs ();
23698
23699   if (frame_pointer_needed && TARGET_ARM)
23700     {
23701       /* Create the new frame pointer.  */
23702       if (TARGET_APCS_FRAME)
23703         {
23704           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23705           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23706           RTX_FRAME_RELATED_P (insn) = 1;
23707         }
23708       else
23709         {
23710           insn = GEN_INT (saved_regs - (4 + fp_offset));
23711           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23712                                         stack_pointer_rtx, insn));
23713           RTX_FRAME_RELATED_P (insn) = 1;
23714         }
23715     }
23716
23717   size = offsets->outgoing_args - offsets->saved_args;
23718   if (flag_stack_usage_info)
23719     current_function_static_stack_size = size;
23720
23721   /* If this isn't an interrupt service routine and we have a frame, then do
23722      stack checking.  We use IP as the first scratch register, except for the
23723      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23724   if (!IS_INTERRUPT (func_type)
23725       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23726           || flag_stack_clash_protection))
23727     {
23728       unsigned int regno;
23729
23730       if (!IS_NESTED (func_type) || clobber_ip)
23731         regno = IP_REGNUM;
23732       else if (df_regs_ever_live_p (LR_REGNUM))
23733         regno = LR_REGNUM;
23734       else
23735         regno = 3;
23736
23737       if (crtl->is_leaf && !cfun->calls_alloca)
23738         {
23739           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23740             arm_emit_probe_stack_range (get_stack_check_protect (),
23741                                         size - get_stack_check_protect (),
23742                                         regno, live_regs_mask);
23743         }
23744       else if (size > 0)
23745         arm_emit_probe_stack_range (get_stack_check_protect (), size,
23746                                     regno, live_regs_mask);
23747     }
23748
23749   /* Recover the static chain register.  */
23750   if (clobber_ip)
23751     {
23752       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23753         insn = gen_rtx_REG (SImode, 3);
23754       else
23755         {
23756           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23757           insn = gen_frame_mem (SImode, insn);
23758         }
23759       emit_set_insn (ip_rtx, insn);
23760       emit_insn (gen_force_register_use (ip_rtx));
23761     }
23762
23763   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23764     {
23765       /* This add can produce multiple insns for a large constant, so we
23766          need to get tricky.  */
23767       rtx_insn *last = get_last_insn ();
23768
23769       amount = GEN_INT (offsets->saved_args + saved_regs
23770                         - offsets->outgoing_args);
23771
23772       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23773                                     amount));
23774       do
23775         {
23776           last = last ? NEXT_INSN (last) : get_insns ();
23777           RTX_FRAME_RELATED_P (last) = 1;
23778         }
23779       while (last != insn);
23780
23781       /* If the frame pointer is needed, emit a special barrier that
23782          will prevent the scheduler from moving stores to the frame
23783          before the stack adjustment.  */
23784       if (frame_pointer_needed)
23785         emit_insn (gen_stack_tie (stack_pointer_rtx,
23786                                   hard_frame_pointer_rtx));
23787     }
23788
23789
23790   if (frame_pointer_needed && TARGET_THUMB2)
23791     thumb_set_frame_pointer (offsets);
23792
23793   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23794     {
23795       unsigned long mask;
23796
23797       mask = live_regs_mask;
23798       mask &= THUMB2_WORK_REGS;
23799       if (!IS_NESTED (func_type))
23800         mask |= (1 << IP_REGNUM);
23801       arm_load_pic_register (mask, NULL_RTX);
23802     }
23803
23804   /* If we are profiling, make sure no instructions are scheduled before
23805      the call to mcount.  Similarly if the user has requested no
23806      scheduling in the prolog.  Similarly if we want non-call exceptions
23807      using the EABI unwinder, to prevent faulting instructions from being
23808      swapped with a stack adjustment.  */
23809   if (crtl->profile || !TARGET_SCHED_PROLOG
23810       || (arm_except_unwind_info (&global_options) == UI_TARGET
23811           && cfun->can_throw_non_call_exceptions))
23812     emit_insn (gen_blockage ());
23813
23814   /* If the link register is being kept alive, with the return address in it,
23815      then make sure that it does not get reused by the ce2 pass.  */
23816   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23817     cfun->machine->lr_save_eliminated = 1;
23818 }
23819 \f
23820 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23821 static void
23822 arm_print_condition (FILE *stream)
23823 {
23824   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23825     {
23826       /* Branch conversion is not implemented for Thumb-2.  */
23827       if (TARGET_THUMB)
23828         {
23829           output_operand_lossage ("predicated Thumb instruction");
23830           return;
23831         }
23832       if (current_insn_predicate != NULL)
23833         {
23834           output_operand_lossage
23835             ("predicated instruction in conditional sequence");
23836           return;
23837         }
23838
23839       fputs (arm_condition_codes[arm_current_cc], stream);
23840     }
23841   else if (current_insn_predicate)
23842     {
23843       enum arm_cond_code code;
23844
23845       if (TARGET_THUMB1)
23846         {
23847           output_operand_lossage ("predicated Thumb instruction");
23848           return;
23849         }
23850
23851       code = get_arm_condition_code (current_insn_predicate);
23852       fputs (arm_condition_codes[code], stream);
23853     }
23854 }
23855
23856
23857 /* Globally reserved letters: acln
23858    Puncutation letters currently used: @_|?().!#
23859    Lower case letters currently used: bcdefhimpqtvwxyz
23860    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23861    Letters previously used, but now deprecated/obsolete: sWXYZ.
23862
23863    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23864
23865    If CODE is 'd', then the X is a condition operand and the instruction
23866    should only be executed if the condition is true.
23867    if CODE is 'D', then the X is a condition operand and the instruction
23868    should only be executed if the condition is false: however, if the mode
23869    of the comparison is CCFPEmode, then always execute the instruction -- we
23870    do this because in these circumstances !GE does not necessarily imply LT;
23871    in these cases the instruction pattern will take care to make sure that
23872    an instruction containing %d will follow, thereby undoing the effects of
23873    doing this instruction unconditionally.
23874    If CODE is 'N' then X is a floating point operand that must be negated
23875    before output.
23876    If CODE is 'B' then output a bitwise inverted value of X (a const int).
23877    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23878    If CODE is 'V', then the operand must be a CONST_INT representing
23879    the bits to preserve in the modified register (Rd) of a BFI or BFC
23880    instruction: print out both the width and lsb (shift) fields.  */
23881 static void
23882 arm_print_operand (FILE *stream, rtx x, int code)
23883 {
23884   switch (code)
23885     {
23886     case '@':
23887       fputs (ASM_COMMENT_START, stream);
23888       return;
23889
23890     case '_':
23891       fputs (user_label_prefix, stream);
23892       return;
23893
23894     case '|':
23895       fputs (REGISTER_PREFIX, stream);
23896       return;
23897
23898     case '?':
23899       arm_print_condition (stream);
23900       return;
23901
23902     case '.':
23903       /* The current condition code for a condition code setting instruction.
23904          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
23905       fputc('s', stream);
23906       arm_print_condition (stream);
23907       return;
23908
23909     case '!':
23910       /* If the instruction is conditionally executed then print
23911          the current condition code, otherwise print 's'.  */
23912       gcc_assert (TARGET_THUMB2);
23913       if (current_insn_predicate)
23914         arm_print_condition (stream);
23915       else
23916         fputc('s', stream);
23917       break;
23918
23919     /* %# is a "break" sequence. It doesn't output anything, but is used to
23920        separate e.g. operand numbers from following text, if that text consists
23921        of further digits which we don't want to be part of the operand
23922        number.  */
23923     case '#':
23924       return;
23925
23926     case 'N':
23927       {
23928         REAL_VALUE_TYPE r;
23929         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23930         fprintf (stream, "%s", fp_const_from_val (&r));
23931       }
23932       return;
23933
23934     /* An integer or symbol address without a preceding # sign.  */
23935     case 'c':
23936       switch (GET_CODE (x))
23937         {
23938         case CONST_INT:
23939           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23940           break;
23941
23942         case SYMBOL_REF:
23943           output_addr_const (stream, x);
23944           break;
23945
23946         case CONST:
23947           if (GET_CODE (XEXP (x, 0)) == PLUS
23948               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23949             {
23950               output_addr_const (stream, x);
23951               break;
23952             }
23953           /* Fall through.  */
23954
23955         default:
23956           output_operand_lossage ("Unsupported operand for code '%c'", code);
23957         }
23958       return;
23959
23960     /* An integer that we want to print in HEX.  */
23961     case 'x':
23962       switch (GET_CODE (x))
23963         {
23964         case CONST_INT:
23965           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23966           break;
23967
23968         default:
23969           output_operand_lossage ("Unsupported operand for code '%c'", code);
23970         }
23971       return;
23972
23973     case 'B':
23974       if (CONST_INT_P (x))
23975         {
23976           HOST_WIDE_INT val;
23977           val = ARM_SIGN_EXTEND (~INTVAL (x));
23978           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23979         }
23980       else
23981         {
23982           putc ('~', stream);
23983           output_addr_const (stream, x);
23984         }
23985       return;
23986
23987     case 'b':
23988       /* Print the log2 of a CONST_INT.  */
23989       {
23990         HOST_WIDE_INT val;
23991
23992         if (!CONST_INT_P (x)
23993             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23994           output_operand_lossage ("Unsupported operand for code '%c'", code);
23995         else
23996           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23997       }
23998       return;
23999
24000     case 'L':
24001       /* The low 16 bits of an immediate constant.  */
24002       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24003       return;
24004
24005     case 'i':
24006       fprintf (stream, "%s", arithmetic_instr (x, 1));
24007       return;
24008
24009     case 'I':
24010       fprintf (stream, "%s", arithmetic_instr (x, 0));
24011       return;
24012
24013     case 'S':
24014       {
24015         HOST_WIDE_INT val;
24016         const char *shift;
24017
24018         shift = shift_op (x, &val);
24019
24020         if (shift)
24021           {
24022             fprintf (stream, ", %s ", shift);
24023             if (val == -1)
24024               arm_print_operand (stream, XEXP (x, 1), 0);
24025             else
24026               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24027           }
24028       }
24029       return;
24030
24031       /* An explanation of the 'Q', 'R' and 'H' register operands:
24032
24033          In a pair of registers containing a DI or DF value the 'Q'
24034          operand returns the register number of the register containing
24035          the least significant part of the value.  The 'R' operand returns
24036          the register number of the register containing the most
24037          significant part of the value.
24038
24039          The 'H' operand returns the higher of the two register numbers.
24040          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24041          same as the 'Q' operand, since the most significant part of the
24042          value is held in the lower number register.  The reverse is true
24043          on systems where WORDS_BIG_ENDIAN is false.
24044
24045          The purpose of these operands is to distinguish between cases
24046          where the endian-ness of the values is important (for example
24047          when they are added together), and cases where the endian-ness
24048          is irrelevant, but the order of register operations is important.
24049          For example when loading a value from memory into a register
24050          pair, the endian-ness does not matter.  Provided that the value
24051          from the lower memory address is put into the lower numbered
24052          register, and the value from the higher address is put into the
24053          higher numbered register, the load will work regardless of whether
24054          the value being loaded is big-wordian or little-wordian.  The
24055          order of the two register loads can matter however, if the address
24056          of the memory location is actually held in one of the registers
24057          being overwritten by the load.
24058
24059          The 'Q' and 'R' constraints are also available for 64-bit
24060          constants.  */
24061     case 'Q':
24062       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24063         {
24064           rtx part = gen_lowpart (SImode, x);
24065           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24066           return;
24067         }
24068
24069       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24070         {
24071           output_operand_lossage ("invalid operand for code '%c'", code);
24072           return;
24073         }
24074
24075       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24076       return;
24077
24078     case 'R':
24079       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24080         {
24081           machine_mode mode = GET_MODE (x);
24082           rtx part;
24083
24084           if (mode == VOIDmode)
24085             mode = DImode;
24086           part = gen_highpart_mode (SImode, mode, x);
24087           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24088           return;
24089         }
24090
24091       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24092         {
24093           output_operand_lossage ("invalid operand for code '%c'", code);
24094           return;
24095         }
24096
24097       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24098       return;
24099
24100     case 'H':
24101       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24102         {
24103           output_operand_lossage ("invalid operand for code '%c'", code);
24104           return;
24105         }
24106
24107       asm_fprintf (stream, "%r", REGNO (x) + 1);
24108       return;
24109
24110     case 'J':
24111       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24112         {
24113           output_operand_lossage ("invalid operand for code '%c'", code);
24114           return;
24115         }
24116
24117       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24118       return;
24119
24120     case 'K':
24121       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24122         {
24123           output_operand_lossage ("invalid operand for code '%c'", code);
24124           return;
24125         }
24126
24127       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24128       return;
24129
24130     case 'm':
24131       asm_fprintf (stream, "%r",
24132                    REG_P (XEXP (x, 0))
24133                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24134       return;
24135
24136     case 'M':
24137       asm_fprintf (stream, "{%r-%r}",
24138                    REGNO (x),
24139                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24140       return;
24141
24142     /* Like 'M', but writing doubleword vector registers, for use by Neon
24143        insns.  */
24144     case 'h':
24145       {
24146         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24147         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24148         if (numregs == 1)
24149           asm_fprintf (stream, "{d%d}", regno);
24150         else
24151           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24152       }
24153       return;
24154
24155     case 'd':
24156       /* CONST_TRUE_RTX means always -- that's the default.  */
24157       if (x == const_true_rtx)
24158         return;
24159
24160       if (!COMPARISON_P (x))
24161         {
24162           output_operand_lossage ("invalid operand for code '%c'", code);
24163           return;
24164         }
24165
24166       fputs (arm_condition_codes[get_arm_condition_code (x)],
24167              stream);
24168       return;
24169
24170     case 'D':
24171       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
24172          want to do that.  */
24173       if (x == const_true_rtx)
24174         {
24175           output_operand_lossage ("instruction never executed");
24176           return;
24177         }
24178       if (!COMPARISON_P (x))
24179         {
24180           output_operand_lossage ("invalid operand for code '%c'", code);
24181           return;
24182         }
24183
24184       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24185                                  (get_arm_condition_code (x))],
24186              stream);
24187       return;
24188
24189     case 'V':
24190       {
24191         /* Output the LSB (shift) and width for a bitmask instruction
24192            based on a literal mask.  The LSB is printed first,
24193            followed by the width.
24194
24195            Eg. For 0b1...1110001, the result is #1, #3.  */
24196         if (!CONST_INT_P (x))
24197           {
24198             output_operand_lossage ("invalid operand for code '%c'", code);
24199             return;
24200           }
24201
24202         unsigned HOST_WIDE_INT val = ~XUINT (x, 0);
24203         int lsb = exact_log2 (val & -val);
24204         asm_fprintf (stream, "#%d, #%d", lsb,
24205                      (exact_log2 (val + (val & -val)) - lsb));
24206       }
24207       return;
24208
24209     case 's':
24210     case 'W':
24211     case 'X':
24212     case 'Y':
24213     case 'Z':
24214       /* Former Maverick support, removed after GCC-4.7.  */
24215       output_operand_lossage ("obsolete Maverick format code '%c'", code);
24216       return;
24217
24218     case 'U':
24219       if (!REG_P (x)
24220           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24221           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24222         /* Bad value for wCG register number.  */
24223         {
24224           output_operand_lossage ("invalid operand for code '%c'", code);
24225           return;
24226         }
24227
24228       else
24229         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24230       return;
24231
24232       /* Print an iWMMXt control register name.  */
24233     case 'w':
24234       if (!CONST_INT_P (x)
24235           || INTVAL (x) < 0
24236           || INTVAL (x) >= 16)
24237         /* Bad value for wC register number.  */
24238         {
24239           output_operand_lossage ("invalid operand for code '%c'", code);
24240           return;
24241         }
24242
24243       else
24244         {
24245           static const char * wc_reg_names [16] =
24246             {
24247               "wCID",  "wCon",  "wCSSF", "wCASF",
24248               "wC4",   "wC5",   "wC6",   "wC7",
24249               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24250               "wC12",  "wC13",  "wC14",  "wC15"
24251             };
24252
24253           fputs (wc_reg_names [INTVAL (x)], stream);
24254         }
24255       return;
24256
24257     /* Print the high single-precision register of a VFP double-precision
24258        register.  */
24259     case 'p':
24260       {
24261         machine_mode mode = GET_MODE (x);
24262         int regno;
24263
24264         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24265           {
24266             output_operand_lossage ("invalid operand for code '%c'", code);
24267             return;
24268           }
24269
24270         regno = REGNO (x);
24271         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24272           {
24273             output_operand_lossage ("invalid operand for code '%c'", code);
24274             return;
24275           }
24276
24277         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24278       }
24279       return;
24280
24281     /* Print a VFP/Neon double precision or quad precision register name.  */
24282     case 'P':
24283     case 'q':
24284       {
24285         machine_mode mode = GET_MODE (x);
24286         int is_quad = (code == 'q');
24287         int regno;
24288
24289         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24290           {
24291             output_operand_lossage ("invalid operand for code '%c'", code);
24292             return;
24293           }
24294
24295         if (!REG_P (x)
24296             || !IS_VFP_REGNUM (REGNO (x)))
24297           {
24298             output_operand_lossage ("invalid operand for code '%c'", code);
24299             return;
24300           }
24301
24302         regno = REGNO (x);
24303         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24304             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24305           {
24306             output_operand_lossage ("invalid operand for code '%c'", code);
24307             return;
24308           }
24309
24310         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24311           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24312       }
24313       return;
24314
24315     /* These two codes print the low/high doubleword register of a Neon quad
24316        register, respectively.  For pair-structure types, can also print
24317        low/high quadword registers.  */
24318     case 'e':
24319     case 'f':
24320       {
24321         machine_mode mode = GET_MODE (x);
24322         int regno;
24323
24324         if ((GET_MODE_SIZE (mode) != 16
24325              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24326           {
24327             output_operand_lossage ("invalid operand for code '%c'", code);
24328             return;
24329           }
24330
24331         regno = REGNO (x);
24332         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24333           {
24334             output_operand_lossage ("invalid operand for code '%c'", code);
24335             return;
24336           }
24337
24338         if (GET_MODE_SIZE (mode) == 16)
24339           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24340                                   + (code == 'f' ? 1 : 0));
24341         else
24342           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24343                                   + (code == 'f' ? 1 : 0));
24344       }
24345       return;
24346
24347     /* Print a VFPv3 floating-point constant, represented as an integer
24348        index.  */
24349     case 'G':
24350       {
24351         int index = vfp3_const_double_index (x);
24352         gcc_assert (index != -1);
24353         fprintf (stream, "%d", index);
24354       }
24355       return;
24356
24357     /* Print bits representing opcode features for Neon.
24358
24359        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24360        and polynomials as unsigned.
24361
24362        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24363
24364        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24365
24366     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24367     case 'T':
24368       {
24369         HOST_WIDE_INT bits = INTVAL (x);
24370         fputc ("uspf"[bits & 3], stream);
24371       }
24372       return;
24373
24374     /* Likewise, but signed and unsigned integers are both 'i'.  */
24375     case 'F':
24376       {
24377         HOST_WIDE_INT bits = INTVAL (x);
24378         fputc ("iipf"[bits & 3], stream);
24379       }
24380       return;
24381
24382     /* As for 'T', but emit 'u' instead of 'p'.  */
24383     case 't':
24384       {
24385         HOST_WIDE_INT bits = INTVAL (x);
24386         fputc ("usuf"[bits & 3], stream);
24387       }
24388       return;
24389
24390     /* Bit 2: rounding (vs none).  */
24391     case 'O':
24392       {
24393         HOST_WIDE_INT bits = INTVAL (x);
24394         fputs ((bits & 4) != 0 ? "r" : "", stream);
24395       }
24396       return;
24397
24398     /* Memory operand for vld1/vst1 instruction.  */
24399     case 'A':
24400       {
24401         rtx addr;
24402         bool postinc = FALSE;
24403         rtx postinc_reg = NULL;
24404         unsigned align, memsize, align_bits;
24405
24406         gcc_assert (MEM_P (x));
24407         addr = XEXP (x, 0);
24408         if (GET_CODE (addr) == POST_INC)
24409           {
24410             postinc = 1;
24411             addr = XEXP (addr, 0);
24412           }
24413         if (GET_CODE (addr) == POST_MODIFY)
24414           {
24415             postinc_reg = XEXP( XEXP (addr, 1), 1);
24416             addr = XEXP (addr, 0);
24417           }
24418         asm_fprintf (stream, "[%r", REGNO (addr));
24419
24420         /* We know the alignment of this access, so we can emit a hint in the
24421            instruction (for some alignments) as an aid to the memory subsystem
24422            of the target.  */
24423         align = MEM_ALIGN (x) >> 3;
24424         memsize = MEM_SIZE (x);
24425
24426         /* Only certain alignment specifiers are supported by the hardware.  */
24427         if (memsize == 32 && (align % 32) == 0)
24428           align_bits = 256;
24429         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24430           align_bits = 128;
24431         else if (memsize >= 8 && (align % 8) == 0)
24432           align_bits = 64;
24433         else
24434           align_bits = 0;
24435
24436         if (align_bits != 0)
24437           asm_fprintf (stream, ":%d", align_bits);
24438
24439         asm_fprintf (stream, "]");
24440
24441         if (postinc)
24442           fputs("!", stream);
24443         if (postinc_reg)
24444           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24445       }
24446       return;
24447
24448     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24449        rtx_code the memory operands output looks like following.
24450        1. [Rn], #+/-<imm>
24451        2. [Rn, #+/-<imm>]!
24452        3. [Rn, #+/-<imm>]
24453        4. [Rn].  */
24454     case 'E':
24455       {
24456         rtx addr;
24457         rtx postinc_reg = NULL;
24458         unsigned inc_val = 0;
24459         enum rtx_code code;
24460
24461         gcc_assert (MEM_P (x));
24462         addr = XEXP (x, 0);
24463         code = GET_CODE (addr);
24464         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24465             || code  == PRE_DEC)
24466           {
24467             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24468             inc_val = GET_MODE_SIZE (GET_MODE (x));
24469             if (code == POST_INC || code == POST_DEC)
24470               asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24471                                               ? "": "-", inc_val);
24472             else
24473               asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24474                                                ? "": "-", inc_val);
24475           }
24476         else if (code == POST_MODIFY || code == PRE_MODIFY)
24477           {
24478             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24479             postinc_reg = XEXP (XEXP (addr, 1), 1);
24480             if (postinc_reg && CONST_INT_P (postinc_reg))
24481               {
24482                 if (code == POST_MODIFY)
24483                   asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24484                 else
24485                   asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24486               }
24487           }
24488         else if (code == PLUS)
24489           {
24490             rtx base = XEXP (addr, 0);
24491             rtx index = XEXP (addr, 1);
24492
24493             gcc_assert (REG_P (base) && CONST_INT_P (index));
24494
24495             HOST_WIDE_INT offset = INTVAL (index);
24496             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24497           }
24498         else
24499           {
24500             gcc_assert (REG_P (addr));
24501             asm_fprintf (stream, "[%r]",REGNO (addr));
24502           }
24503       }
24504       return;
24505
24506     case 'C':
24507       {
24508         rtx addr;
24509
24510         gcc_assert (MEM_P (x));
24511         addr = XEXP (x, 0);
24512         gcc_assert (REG_P (addr));
24513         asm_fprintf (stream, "[%r]", REGNO (addr));
24514       }
24515       return;
24516
24517     /* Translate an S register number into a D register number and element index.  */
24518     case 'y':
24519       {
24520         machine_mode mode = GET_MODE (x);
24521         int regno;
24522
24523         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24524           {
24525             output_operand_lossage ("invalid operand for code '%c'", code);
24526             return;
24527           }
24528
24529         regno = REGNO (x);
24530         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24531           {
24532             output_operand_lossage ("invalid operand for code '%c'", code);
24533             return;
24534           }
24535
24536         regno = regno - FIRST_VFP_REGNUM;
24537         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24538       }
24539       return;
24540
24541     case 'v':
24542         gcc_assert (CONST_DOUBLE_P (x));
24543         int result;
24544         result = vfp3_const_double_for_fract_bits (x);
24545         if (result == 0)
24546           result = vfp3_const_double_for_bits (x);
24547         fprintf (stream, "#%d", result);
24548         return;
24549
24550     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24551        number into a D register number and element index.  */
24552     case 'z':
24553       {
24554         machine_mode mode = GET_MODE (x);
24555         int regno;
24556
24557         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24558           {
24559             output_operand_lossage ("invalid operand for code '%c'", code);
24560             return;
24561           }
24562
24563         regno = REGNO (x);
24564         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24565           {
24566             output_operand_lossage ("invalid operand for code '%c'", code);
24567             return;
24568           }
24569
24570         regno = regno - FIRST_VFP_REGNUM;
24571         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24572       }
24573       return;
24574
24575     default:
24576       if (x == 0)
24577         {
24578           output_operand_lossage ("missing operand");
24579           return;
24580         }
24581
24582       switch (GET_CODE (x))
24583         {
24584         case REG:
24585           asm_fprintf (stream, "%r", REGNO (x));
24586           break;
24587
24588         case MEM:
24589           output_address (GET_MODE (x), XEXP (x, 0));
24590           break;
24591
24592         case CONST_DOUBLE:
24593           {
24594             char fpstr[20];
24595             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24596                               sizeof (fpstr), 0, 1);
24597             fprintf (stream, "#%s", fpstr);
24598           }
24599           break;
24600
24601         default:
24602           gcc_assert (GET_CODE (x) != NEG);
24603           fputc ('#', stream);
24604           if (GET_CODE (x) == HIGH)
24605             {
24606               fputs (":lower16:", stream);
24607               x = XEXP (x, 0);
24608             }
24609
24610           output_addr_const (stream, x);
24611           break;
24612         }
24613     }
24614 }
24615 \f
24616 /* Target hook for printing a memory address.  */
24617 static void
24618 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24619 {
24620   if (TARGET_32BIT)
24621     {
24622       int is_minus = GET_CODE (x) == MINUS;
24623
24624       if (REG_P (x))
24625         asm_fprintf (stream, "[%r]", REGNO (x));
24626       else if (GET_CODE (x) == PLUS || is_minus)
24627         {
24628           rtx base = XEXP (x, 0);
24629           rtx index = XEXP (x, 1);
24630           HOST_WIDE_INT offset = 0;
24631           if (!REG_P (base)
24632               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24633             {
24634               /* Ensure that BASE is a register.  */
24635               /* (one of them must be).  */
24636               /* Also ensure the SP is not used as in index register.  */
24637               std::swap (base, index);
24638             }
24639           switch (GET_CODE (index))
24640             {
24641             case CONST_INT:
24642               offset = INTVAL (index);
24643               if (is_minus)
24644                 offset = -offset;
24645               asm_fprintf (stream, "[%r, #%wd]",
24646                            REGNO (base), offset);
24647               break;
24648
24649             case REG:
24650               asm_fprintf (stream, "[%r, %s%r]",
24651                            REGNO (base), is_minus ? "-" : "",
24652                            REGNO (index));
24653               break;
24654
24655             case MULT:
24656             case ASHIFTRT:
24657             case LSHIFTRT:
24658             case ASHIFT:
24659             case ROTATERT:
24660               {
24661                 asm_fprintf (stream, "[%r, %s%r",
24662                              REGNO (base), is_minus ? "-" : "",
24663                              REGNO (XEXP (index, 0)));
24664                 arm_print_operand (stream, index, 'S');
24665                 fputs ("]", stream);
24666                 break;
24667               }
24668
24669             default:
24670               gcc_unreachable ();
24671             }
24672         }
24673       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24674                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24675         {
24676           gcc_assert (REG_P (XEXP (x, 0)));
24677
24678           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24679             asm_fprintf (stream, "[%r, #%s%d]!",
24680                          REGNO (XEXP (x, 0)),
24681                          GET_CODE (x) == PRE_DEC ? "-" : "",
24682                          GET_MODE_SIZE (mode));
24683           else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24684             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24685           else
24686             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24687                          GET_CODE (x) == POST_DEC ? "-" : "",
24688                          GET_MODE_SIZE (mode));
24689         }
24690       else if (GET_CODE (x) == PRE_MODIFY)
24691         {
24692           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24693           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24694             asm_fprintf (stream, "#%wd]!",
24695                          INTVAL (XEXP (XEXP (x, 1), 1)));
24696           else
24697             asm_fprintf (stream, "%r]!",
24698                          REGNO (XEXP (XEXP (x, 1), 1)));
24699         }
24700       else if (GET_CODE (x) == POST_MODIFY)
24701         {
24702           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24703           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24704             asm_fprintf (stream, "#%wd",
24705                          INTVAL (XEXP (XEXP (x, 1), 1)));
24706           else
24707             asm_fprintf (stream, "%r",
24708                          REGNO (XEXP (XEXP (x, 1), 1)));
24709         }
24710       else output_addr_const (stream, x);
24711     }
24712   else
24713     {
24714       if (REG_P (x))
24715         asm_fprintf (stream, "[%r]", REGNO (x));
24716       else if (GET_CODE (x) == POST_INC)
24717         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24718       else if (GET_CODE (x) == PLUS)
24719         {
24720           gcc_assert (REG_P (XEXP (x, 0)));
24721           if (CONST_INT_P (XEXP (x, 1)))
24722             asm_fprintf (stream, "[%r, #%wd]",
24723                          REGNO (XEXP (x, 0)),
24724                          INTVAL (XEXP (x, 1)));
24725           else
24726             asm_fprintf (stream, "[%r, %r]",
24727                          REGNO (XEXP (x, 0)),
24728                          REGNO (XEXP (x, 1)));
24729         }
24730       else
24731         output_addr_const (stream, x);
24732     }
24733 }
24734 \f
24735 /* Target hook for indicating whether a punctuation character for
24736    TARGET_PRINT_OPERAND is valid.  */
24737 static bool
24738 arm_print_operand_punct_valid_p (unsigned char code)
24739 {
24740   return (code == '@' || code == '|' || code == '.'
24741           || code == '(' || code == ')' || code == '#'
24742           || (TARGET_32BIT && (code == '?'))
24743           || (TARGET_THUMB2 && (code == '!'))
24744           || (TARGET_THUMB && (code == '_')));
24745 }
24746 \f
24747 /* Target hook for assembling integer objects.  The ARM version needs to
24748    handle word-sized values specially.  */
24749 static bool
24750 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24751 {
24752   machine_mode mode;
24753
24754   if (size == UNITS_PER_WORD && aligned_p)
24755     {
24756       fputs ("\t.word\t", asm_out_file);
24757       output_addr_const (asm_out_file, x);
24758
24759       /* Mark symbols as position independent.  We only do this in the
24760          .text segment, not in the .data segment.  */
24761       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24762           (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24763         {
24764           /* See legitimize_pic_address for an explanation of the
24765              TARGET_VXWORKS_RTP check.  */
24766           /* References to weak symbols cannot be resolved locally:
24767              they may be overridden by a non-weak definition at link
24768              time.  */
24769           if (!arm_pic_data_is_text_relative
24770               || (SYMBOL_REF_P (x)
24771                   && (!SYMBOL_REF_LOCAL_P (x)
24772                       || (SYMBOL_REF_DECL (x)
24773                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24774                       || (SYMBOL_REF_FUNCTION_P (x)
24775                           && !arm_fdpic_local_funcdesc_p (x)))))
24776             {
24777               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24778                 fputs ("(GOTFUNCDESC)", asm_out_file);
24779               else
24780                 fputs ("(GOT)", asm_out_file);
24781             }
24782           else
24783             {
24784               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24785                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24786               else
24787                 {
24788                   bool is_readonly;
24789
24790                   if (!TARGET_FDPIC
24791                       || arm_is_segment_info_known (x, &is_readonly))
24792                     fputs ("(GOTOFF)", asm_out_file);
24793                   else
24794                     fputs ("(GOT)", asm_out_file);
24795                 }
24796             }
24797         }
24798
24799       /* For FDPIC we also have to mark symbol for .data section.  */
24800       if (TARGET_FDPIC
24801           && !making_const_table
24802           && SYMBOL_REF_P (x)
24803           && SYMBOL_REF_FUNCTION_P (x))
24804         fputs ("(FUNCDESC)", asm_out_file);
24805
24806       fputc ('\n', asm_out_file);
24807       return true;
24808     }
24809
24810   mode = GET_MODE (x);
24811
24812   if (arm_vector_mode_supported_p (mode))
24813     {
24814       int i, units;
24815
24816       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24817
24818       units = CONST_VECTOR_NUNITS (x);
24819       size = GET_MODE_UNIT_SIZE (mode);
24820
24821       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24822         for (i = 0; i < units; i++)
24823           {
24824             rtx elt = CONST_VECTOR_ELT (x, i);
24825             assemble_integer
24826               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24827           }
24828       else
24829         for (i = 0; i < units; i++)
24830           {
24831             rtx elt = CONST_VECTOR_ELT (x, i);
24832             assemble_real
24833               (*CONST_DOUBLE_REAL_VALUE (elt),
24834                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24835                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24836           }
24837
24838       return true;
24839     }
24840
24841   return default_assemble_integer (x, size, aligned_p);
24842 }
24843
24844 static void
24845 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24846 {
24847   section *s;
24848
24849   if (!TARGET_AAPCS_BASED)
24850     {
24851       (is_ctor ?
24852        default_named_section_asm_out_constructor
24853        : default_named_section_asm_out_destructor) (symbol, priority);
24854       return;
24855     }
24856
24857   /* Put these in the .init_array section, using a special relocation.  */
24858   if (priority != DEFAULT_INIT_PRIORITY)
24859     {
24860       char buf[18];
24861       sprintf (buf, "%s.%.5u",
24862                is_ctor ? ".init_array" : ".fini_array",
24863                priority);
24864       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24865     }
24866   else if (is_ctor)
24867     s = ctors_section;
24868   else
24869     s = dtors_section;
24870
24871   switch_to_section (s);
24872   assemble_align (POINTER_SIZE);
24873   fputs ("\t.word\t", asm_out_file);
24874   output_addr_const (asm_out_file, symbol);
24875   fputs ("(target1)\n", asm_out_file);
24876 }
24877
24878 /* Add a function to the list of static constructors.  */
24879
24880 static void
24881 arm_elf_asm_constructor (rtx symbol, int priority)
24882 {
24883   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24884 }
24885
24886 /* Add a function to the list of static destructors.  */
24887
24888 static void
24889 arm_elf_asm_destructor (rtx symbol, int priority)
24890 {
24891   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24892 }
24893 \f
24894 /* A finite state machine takes care of noticing whether or not instructions
24895    can be conditionally executed, and thus decrease execution time and code
24896    size by deleting branch instructions.  The fsm is controlled by
24897    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
24898
24899 /* The state of the fsm controlling condition codes are:
24900    0: normal, do nothing special
24901    1: make ASM_OUTPUT_OPCODE not output this instruction
24902    2: make ASM_OUTPUT_OPCODE not output this instruction
24903    3: make instructions conditional
24904    4: make instructions conditional
24905
24906    State transitions (state->state by whom under condition):
24907    0 -> 1 final_prescan_insn if the `target' is a label
24908    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24909    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24910    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24911    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24912           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24913    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24914           (the target insn is arm_target_insn).
24915
24916    If the jump clobbers the conditions then we use states 2 and 4.
24917
24918    A similar thing can be done with conditional return insns.
24919
24920    XXX In case the `target' is an unconditional branch, this conditionalising
24921    of the instructions always reduces code size, but not always execution
24922    time.  But then, I want to reduce the code size to somewhere near what
24923    /bin/cc produces.  */
24924
24925 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24926    instructions.  When a COND_EXEC instruction is seen the subsequent
24927    instructions are scanned so that multiple conditional instructions can be
24928    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
24929    specify the length and true/false mask for the IT block.  These will be
24930    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
24931
24932 /* Returns the index of the ARM condition code string in
24933    `arm_condition_codes', or ARM_NV if the comparison is invalid.
24934    COMPARISON should be an rtx like `(eq (...) (...))'.  */
24935
24936 enum arm_cond_code
24937 maybe_get_arm_condition_code (rtx comparison)
24938 {
24939   machine_mode mode = GET_MODE (XEXP (comparison, 0));
24940   enum arm_cond_code code;
24941   enum rtx_code comp_code = GET_CODE (comparison);
24942
24943   if (GET_MODE_CLASS (mode) != MODE_CC)
24944     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24945                            XEXP (comparison, 1));
24946
24947   switch (mode)
24948     {
24949     case E_CC_DNEmode: code = ARM_NE; goto dominance;
24950     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24951     case E_CC_DGEmode: code = ARM_GE; goto dominance;
24952     case E_CC_DGTmode: code = ARM_GT; goto dominance;
24953     case E_CC_DLEmode: code = ARM_LE; goto dominance;
24954     case E_CC_DLTmode: code = ARM_LT; goto dominance;
24955     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24956     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24957     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24958     case E_CC_DLTUmode: code = ARM_CC;
24959
24960     dominance:
24961       if (comp_code == EQ)
24962         return ARM_INVERSE_CONDITION_CODE (code);
24963       if (comp_code == NE)
24964         return code;
24965       return ARM_NV;
24966
24967     case E_CC_NZmode:
24968       switch (comp_code)
24969         {
24970         case NE: return ARM_NE;
24971         case EQ: return ARM_EQ;
24972         case GE: return ARM_PL;
24973         case LT: return ARM_MI;
24974         default: return ARM_NV;
24975         }
24976
24977     case E_CC_Zmode:
24978       switch (comp_code)
24979         {
24980         case NE: return ARM_NE;
24981         case EQ: return ARM_EQ;
24982         default: return ARM_NV;
24983         }
24984
24985     case E_CC_Nmode:
24986       switch (comp_code)
24987         {
24988         case NE: return ARM_MI;
24989         case EQ: return ARM_PL;
24990         default: return ARM_NV;
24991         }
24992
24993     case E_CCFPEmode:
24994     case E_CCFPmode:
24995       /* We can handle all cases except UNEQ and LTGT.  */
24996       switch (comp_code)
24997         {
24998         case GE: return ARM_GE;
24999         case GT: return ARM_GT;
25000         case LE: return ARM_LS;
25001         case LT: return ARM_MI;
25002         case NE: return ARM_NE;
25003         case EQ: return ARM_EQ;
25004         case ORDERED: return ARM_VC;
25005         case UNORDERED: return ARM_VS;
25006         case UNLT: return ARM_LT;
25007         case UNLE: return ARM_LE;
25008         case UNGT: return ARM_HI;
25009         case UNGE: return ARM_PL;
25010           /* UNEQ and LTGT do not have a representation.  */
25011         case UNEQ: /* Fall through.  */
25012         case LTGT: /* Fall through.  */
25013         default: return ARM_NV;
25014         }
25015
25016     case E_CC_SWPmode:
25017       switch (comp_code)
25018         {
25019         case NE: return ARM_NE;
25020         case EQ: return ARM_EQ;
25021         case GE: return ARM_LE;
25022         case GT: return ARM_LT;
25023         case LE: return ARM_GE;
25024         case LT: return ARM_GT;
25025         case GEU: return ARM_LS;
25026         case GTU: return ARM_CC;
25027         case LEU: return ARM_CS;
25028         case LTU: return ARM_HI;
25029         default: return ARM_NV;
25030         }
25031
25032     case E_CC_Cmode:
25033       switch (comp_code)
25034         {
25035         case LTU: return ARM_CS;
25036         case GEU: return ARM_CC;
25037         default: return ARM_NV;
25038         }
25039
25040     case E_CC_NVmode:
25041       switch (comp_code)
25042         {
25043         case GE: return ARM_GE;
25044         case LT: return ARM_LT;
25045         default: return ARM_NV;
25046         }
25047
25048     case E_CC_Bmode:
25049       switch (comp_code)
25050         {
25051         case GEU: return ARM_CS;
25052         case LTU: return ARM_CC;
25053         default: return ARM_NV;
25054         }
25055
25056     case E_CC_Vmode:
25057       switch (comp_code)
25058         {
25059         case NE: return ARM_VS;
25060         case EQ: return ARM_VC;
25061         default: return ARM_NV;
25062         }
25063
25064     case E_CC_ADCmode:
25065       switch (comp_code)
25066         {
25067         case GEU: return ARM_CS;
25068         case LTU: return ARM_CC;
25069         default: return ARM_NV;
25070         }
25071
25072     case E_CCmode:
25073     case E_CC_RSBmode:
25074       switch (comp_code)
25075         {
25076         case NE: return ARM_NE;
25077         case EQ: return ARM_EQ;
25078         case GE: return ARM_GE;
25079         case GT: return ARM_GT;
25080         case LE: return ARM_LE;
25081         case LT: return ARM_LT;
25082         case GEU: return ARM_CS;
25083         case GTU: return ARM_HI;
25084         case LEU: return ARM_LS;
25085         case LTU: return ARM_CC;
25086         default: return ARM_NV;
25087         }
25088
25089     default: gcc_unreachable ();
25090     }
25091 }
25092
25093 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
25094 static enum arm_cond_code
25095 get_arm_condition_code (rtx comparison)
25096 {
25097   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25098   gcc_assert (code != ARM_NV);
25099   return code;
25100 }
25101
25102 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
25103    code registers when not targetting Thumb1.  The VFP condition register
25104    only exists when generating hard-float code.  */
25105 static bool
25106 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25107 {
25108   if (!TARGET_32BIT)
25109     return false;
25110
25111   *p1 = CC_REGNUM;
25112   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25113   return true;
25114 }
25115
25116 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25117    instructions.  */
25118 void
25119 thumb2_final_prescan_insn (rtx_insn *insn)
25120 {
25121   rtx_insn *first_insn = insn;
25122   rtx body = PATTERN (insn);
25123   rtx predicate;
25124   enum arm_cond_code code;
25125   int n;
25126   int mask;
25127   int max;
25128
25129   /* max_insns_skipped in the tune was already taken into account in the
25130      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
25131      just emit the IT blocks as we can.  It does not make sense to split
25132      the IT blocks.  */
25133   max = MAX_INSN_PER_IT_BLOCK;
25134
25135   /* Remove the previous insn from the count of insns to be output.  */
25136   if (arm_condexec_count)
25137       arm_condexec_count--;
25138
25139   /* Nothing to do if we are already inside a conditional block.  */
25140   if (arm_condexec_count)
25141     return;
25142
25143   if (GET_CODE (body) != COND_EXEC)
25144     return;
25145
25146   /* Conditional jumps are implemented directly.  */
25147   if (JUMP_P (insn))
25148     return;
25149
25150   predicate = COND_EXEC_TEST (body);
25151   arm_current_cc = get_arm_condition_code (predicate);
25152
25153   n = get_attr_ce_count (insn);
25154   arm_condexec_count = 1;
25155   arm_condexec_mask = (1 << n) - 1;
25156   arm_condexec_masklen = n;
25157   /* See if subsequent instructions can be combined into the same block.  */
25158   for (;;)
25159     {
25160       insn = next_nonnote_insn (insn);
25161
25162       /* Jumping into the middle of an IT block is illegal, so a label or
25163          barrier terminates the block.  */
25164       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25165         break;
25166
25167       body = PATTERN (insn);
25168       /* USE and CLOBBER aren't really insns, so just skip them.  */
25169       if (GET_CODE (body) == USE
25170           || GET_CODE (body) == CLOBBER)
25171         continue;
25172
25173       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
25174       if (GET_CODE (body) != COND_EXEC)
25175         break;
25176       /* Maximum number of conditionally executed instructions in a block.  */
25177       n = get_attr_ce_count (insn);
25178       if (arm_condexec_masklen + n > max)
25179         break;
25180
25181       predicate = COND_EXEC_TEST (body);
25182       code = get_arm_condition_code (predicate);
25183       mask = (1 << n) - 1;
25184       if (arm_current_cc == code)
25185         arm_condexec_mask |= (mask << arm_condexec_masklen);
25186       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25187         break;
25188
25189       arm_condexec_count++;
25190       arm_condexec_masklen += n;
25191
25192       /* A jump must be the last instruction in a conditional block.  */
25193       if (JUMP_P (insn))
25194         break;
25195     }
25196   /* Restore recog_data (getting the attributes of other insns can
25197      destroy this array, but final.cc assumes that it remains intact
25198      across this call).  */
25199   extract_constrain_insn_cached (first_insn);
25200 }
25201
25202 void
25203 arm_final_prescan_insn (rtx_insn *insn)
25204 {
25205   /* BODY will hold the body of INSN.  */
25206   rtx body = PATTERN (insn);
25207
25208   /* This will be 1 if trying to repeat the trick, and things need to be
25209      reversed if it appears to fail.  */
25210   int reverse = 0;
25211
25212   /* If we start with a return insn, we only succeed if we find another one.  */
25213   int seeking_return = 0;
25214   enum rtx_code return_code = UNKNOWN;
25215
25216   /* START_INSN will hold the insn from where we start looking.  This is the
25217      first insn after the following code_label if REVERSE is true.  */
25218   rtx_insn *start_insn = insn;
25219
25220   /* If in state 4, check if the target branch is reached, in order to
25221      change back to state 0.  */
25222   if (arm_ccfsm_state == 4)
25223     {
25224       if (insn == arm_target_insn)
25225         {
25226           arm_target_insn = NULL;
25227           arm_ccfsm_state = 0;
25228         }
25229       return;
25230     }
25231
25232   /* If in state 3, it is possible to repeat the trick, if this insn is an
25233      unconditional branch to a label, and immediately following this branch
25234      is the previous target label which is only used once, and the label this
25235      branch jumps to is not too far off.  */
25236   if (arm_ccfsm_state == 3)
25237     {
25238       if (simplejump_p (insn))
25239         {
25240           start_insn = next_nonnote_insn (start_insn);
25241           if (BARRIER_P (start_insn))
25242             {
25243               /* XXX Isn't this always a barrier?  */
25244               start_insn = next_nonnote_insn (start_insn);
25245             }
25246           if (LABEL_P (start_insn)
25247               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25248               && LABEL_NUSES (start_insn) == 1)
25249             reverse = TRUE;
25250           else
25251             return;
25252         }
25253       else if (ANY_RETURN_P (body))
25254         {
25255           start_insn = next_nonnote_insn (start_insn);
25256           if (BARRIER_P (start_insn))
25257             start_insn = next_nonnote_insn (start_insn);
25258           if (LABEL_P (start_insn)
25259               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25260               && LABEL_NUSES (start_insn) == 1)
25261             {
25262               reverse = TRUE;
25263               seeking_return = 1;
25264               return_code = GET_CODE (body);
25265             }
25266           else
25267             return;
25268         }
25269       else
25270         return;
25271     }
25272
25273   gcc_assert (!arm_ccfsm_state || reverse);
25274   if (!JUMP_P (insn))
25275     return;
25276
25277   /* This jump might be paralleled with a clobber of the condition codes
25278      the jump should always come first */
25279   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25280     body = XVECEXP (body, 0, 0);
25281
25282   if (reverse
25283       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25284           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25285     {
25286       int insns_skipped;
25287       int fail = FALSE, succeed = FALSE;
25288       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25289       int then_not_else = TRUE;
25290       rtx_insn *this_insn = start_insn;
25291       rtx label = 0;
25292
25293       /* Register the insn jumped to.  */
25294       if (reverse)
25295         {
25296           if (!seeking_return)
25297             label = XEXP (SET_SRC (body), 0);
25298         }
25299       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25300         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25301       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25302         {
25303           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25304           then_not_else = FALSE;
25305         }
25306       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25307         {
25308           seeking_return = 1;
25309           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25310         }
25311       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25312         {
25313           seeking_return = 1;
25314           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25315           then_not_else = FALSE;
25316         }
25317       else
25318         gcc_unreachable ();
25319
25320       /* See how many insns this branch skips, and what kind of insns.  If all
25321          insns are okay, and the label or unconditional branch to the same
25322          label is not too far away, succeed.  */
25323       for (insns_skipped = 0;
25324            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25325         {
25326           rtx scanbody;
25327
25328           this_insn = next_nonnote_insn (this_insn);
25329           if (!this_insn)
25330             break;
25331
25332           switch (GET_CODE (this_insn))
25333             {
25334             case CODE_LABEL:
25335               /* Succeed if it is the target label, otherwise fail since
25336                  control falls in from somewhere else.  */
25337               if (this_insn == label)
25338                 {
25339                   arm_ccfsm_state = 1;
25340                   succeed = TRUE;
25341                 }
25342               else
25343                 fail = TRUE;
25344               break;
25345
25346             case BARRIER:
25347               /* Succeed if the following insn is the target label.
25348                  Otherwise fail.
25349                  If return insns are used then the last insn in a function
25350                  will be a barrier.  */
25351               this_insn = next_nonnote_insn (this_insn);
25352               if (this_insn && this_insn == label)
25353                 {
25354                   arm_ccfsm_state = 1;
25355                   succeed = TRUE;
25356                 }
25357               else
25358                 fail = TRUE;
25359               break;
25360
25361             case CALL_INSN:
25362               /* The AAPCS says that conditional calls should not be
25363                  used since they make interworking inefficient (the
25364                  linker can't transform BL<cond> into BLX).  That's
25365                  only a problem if the machine has BLX.  */
25366               if (arm_arch5t)
25367                 {
25368                   fail = TRUE;
25369                   break;
25370                 }
25371
25372               /* Succeed if the following insn is the target label, or
25373                  if the following two insns are a barrier and the
25374                  target label.  */
25375               this_insn = next_nonnote_insn (this_insn);
25376               if (this_insn && BARRIER_P (this_insn))
25377                 this_insn = next_nonnote_insn (this_insn);
25378
25379               if (this_insn && this_insn == label
25380                   && insns_skipped < max_insns_skipped)
25381                 {
25382                   arm_ccfsm_state = 1;
25383                   succeed = TRUE;
25384                 }
25385               else
25386                 fail = TRUE;
25387               break;
25388
25389             case JUMP_INSN:
25390               /* If this is an unconditional branch to the same label, succeed.
25391                  If it is to another label, do nothing.  If it is conditional,
25392                  fail.  */
25393               /* XXX Probably, the tests for SET and the PC are
25394                  unnecessary.  */
25395
25396               scanbody = PATTERN (this_insn);
25397               if (GET_CODE (scanbody) == SET
25398                   && GET_CODE (SET_DEST (scanbody)) == PC)
25399                 {
25400                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25401                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25402                     {
25403                       arm_ccfsm_state = 2;
25404                       succeed = TRUE;
25405                     }
25406                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25407                     fail = TRUE;
25408                 }
25409               /* Fail if a conditional return is undesirable (e.g. on a
25410                  StrongARM), but still allow this if optimizing for size.  */
25411               else if (GET_CODE (scanbody) == return_code
25412                        && !use_return_insn (TRUE, NULL)
25413                        && !optimize_size)
25414                 fail = TRUE;
25415               else if (GET_CODE (scanbody) == return_code)
25416                 {
25417                   arm_ccfsm_state = 2;
25418                   succeed = TRUE;
25419                 }
25420               else if (GET_CODE (scanbody) == PARALLEL)
25421                 {
25422                   switch (get_attr_conds (this_insn))
25423                     {
25424                     case CONDS_NOCOND:
25425                       break;
25426                     default:
25427                       fail = TRUE;
25428                       break;
25429                     }
25430                 }
25431               else
25432                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25433
25434               break;
25435
25436             case INSN:
25437               /* Instructions using or affecting the condition codes make it
25438                  fail.  */
25439               scanbody = PATTERN (this_insn);
25440               if (!(GET_CODE (scanbody) == SET
25441                     || GET_CODE (scanbody) == PARALLEL)
25442                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25443                 fail = TRUE;
25444               break;
25445
25446             default:
25447               break;
25448             }
25449         }
25450       if (succeed)
25451         {
25452           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25453             arm_target_label = CODE_LABEL_NUMBER (label);
25454           else
25455             {
25456               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25457
25458               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25459                 {
25460                   this_insn = next_nonnote_insn (this_insn);
25461                   gcc_assert (!this_insn
25462                               || (!BARRIER_P (this_insn)
25463                                   && !LABEL_P (this_insn)));
25464                 }
25465               if (!this_insn)
25466                 {
25467                   /* Oh, dear! we ran off the end.. give up.  */
25468                   extract_constrain_insn_cached (insn);
25469                   arm_ccfsm_state = 0;
25470                   arm_target_insn = NULL;
25471                   return;
25472                 }
25473               arm_target_insn = this_insn;
25474             }
25475
25476           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25477              what it was.  */
25478           if (!reverse)
25479             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25480
25481           if (reverse || then_not_else)
25482             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25483         }
25484
25485       /* Restore recog_data (getting the attributes of other insns can
25486          destroy this array, but final.cc assumes that it remains intact
25487          across this call.  */
25488       extract_constrain_insn_cached (insn);
25489     }
25490 }
25491
25492 /* Output IT instructions.  */
25493 void
25494 thumb2_asm_output_opcode (FILE * stream)
25495 {
25496   char buff[5];
25497   int n;
25498
25499   if (arm_condexec_mask)
25500     {
25501       for (n = 0; n < arm_condexec_masklen; n++)
25502         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25503       buff[n] = 0;
25504       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25505                   arm_condition_codes[arm_current_cc]);
25506       arm_condexec_mask = 0;
25507     }
25508 }
25509
25510 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25511    UNITS_PER_WORD bytes wide.  */
25512 static unsigned int
25513 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25514 {
25515   if (IS_VPR_REGNUM (regno))
25516     return CEIL (GET_MODE_SIZE (mode), 2);
25517
25518   if (TARGET_32BIT
25519       && regno > PC_REGNUM
25520       && regno != FRAME_POINTER_REGNUM
25521       && regno != ARG_POINTER_REGNUM
25522       && !IS_VFP_REGNUM (regno))
25523     return 1;
25524
25525   return ARM_NUM_REGS (mode);
25526 }
25527
25528 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25529 static bool
25530 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25531 {
25532   if (GET_MODE_CLASS (mode) == MODE_CC)
25533     return (regno == CC_REGNUM
25534             || (TARGET_VFP_BASE
25535                 && regno == VFPCC_REGNUM));
25536
25537   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25538     return false;
25539
25540   if (IS_VPR_REGNUM (regno))
25541     return mode == HImode
25542       || mode == V16BImode
25543       || mode == V8BImode
25544       || mode == V4BImode;
25545
25546   if (TARGET_THUMB1)
25547     /* For the Thumb we only allow values bigger than SImode in
25548        registers 0 - 6, so that there is always a second low
25549        register available to hold the upper part of the value.
25550        We probably we ought to ensure that the register is the
25551        start of an even numbered register pair.  */
25552     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25553
25554   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25555     {
25556       if (mode == DFmode || mode == DImode)
25557         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25558
25559       if (mode == HFmode || mode == BFmode || mode == HImode
25560           || mode == SFmode || mode == SImode)
25561         return VFP_REGNO_OK_FOR_SINGLE (regno);
25562
25563       if (TARGET_NEON)
25564         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25565                || (VALID_NEON_QREG_MODE (mode)
25566                    && NEON_REGNO_OK_FOR_QUAD (regno))
25567                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25568                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25569                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25570                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25571                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25572      if (TARGET_HAVE_MVE)
25573        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25574                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25575                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25576
25577       return false;
25578     }
25579
25580   if (TARGET_REALLY_IWMMXT)
25581     {
25582       if (IS_IWMMXT_GR_REGNUM (regno))
25583         return mode == SImode;
25584
25585       if (IS_IWMMXT_REGNUM (regno))
25586         return VALID_IWMMXT_REG_MODE (mode);
25587     }
25588
25589   /* We allow almost any value to be stored in the general registers.
25590      Restrict doubleword quantities to even register pairs in ARM state
25591      so that we can use ldrd. The same restriction applies for MVE
25592      in order to support Armv8.1-M Mainline instructions.
25593      Do not allow very large Neon structure  opaque modes in general
25594      registers; they would use too many.  */
25595   if (regno <= LAST_ARM_REGNUM)
25596     {
25597       if (ARM_NUM_REGS (mode) > 4)
25598         return false;
25599
25600       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25601         return true;
25602
25603       return !((TARGET_LDRD || TARGET_CDE)
25604                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25605     }
25606
25607   if (regno == FRAME_POINTER_REGNUM
25608       || regno == ARG_POINTER_REGNUM)
25609     /* We only allow integers in the fake hard registers.  */
25610     return GET_MODE_CLASS (mode) == MODE_INT;
25611
25612   return false;
25613 }
25614
25615 /* Implement TARGET_MODES_TIEABLE_P.  */
25616
25617 static bool
25618 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25619 {
25620   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25621     return true;
25622
25623   /* We specifically want to allow elements of "structure" modes to
25624      be tieable to the structure.  This more general condition allows
25625      other rarer situations too.  */
25626   if ((TARGET_NEON
25627        && (VALID_NEON_DREG_MODE (mode1)
25628            || VALID_NEON_QREG_MODE (mode1)
25629            || VALID_NEON_STRUCT_MODE (mode1))
25630        && (VALID_NEON_DREG_MODE (mode2)
25631            || VALID_NEON_QREG_MODE (mode2)
25632            || VALID_NEON_STRUCT_MODE (mode2)))
25633       || (TARGET_HAVE_MVE
25634           && (VALID_MVE_MODE (mode1)
25635               || VALID_MVE_STRUCT_MODE (mode1))
25636           && (VALID_MVE_MODE (mode2)
25637               || VALID_MVE_STRUCT_MODE (mode2))))
25638     return true;
25639
25640   return false;
25641 }
25642
25643 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25644    not used in arm mode.  */
25645
25646 enum reg_class
25647 arm_regno_class (int regno)
25648 {
25649   if (regno == PC_REGNUM)
25650     return NO_REGS;
25651
25652   if (IS_VPR_REGNUM (regno))
25653     return VPR_REG;
25654
25655   if (TARGET_THUMB1)
25656     {
25657       if (regno == STACK_POINTER_REGNUM)
25658         return STACK_REG;
25659       if (regno == CC_REGNUM)
25660         return CC_REG;
25661       if (regno < 8)
25662         return LO_REGS;
25663       return HI_REGS;
25664     }
25665
25666   if (TARGET_THUMB2 && regno < 8)
25667     return LO_REGS;
25668
25669   if (   regno <= LAST_ARM_REGNUM
25670       || regno == FRAME_POINTER_REGNUM
25671       || regno == ARG_POINTER_REGNUM)
25672     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25673
25674   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25675     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25676
25677   if (IS_VFP_REGNUM (regno))
25678     {
25679       if (regno <= D7_VFP_REGNUM)
25680         return VFP_D0_D7_REGS;
25681       else if (regno <= LAST_LO_VFP_REGNUM)
25682         return VFP_LO_REGS;
25683       else
25684         return VFP_HI_REGS;
25685     }
25686
25687   if (IS_IWMMXT_REGNUM (regno))
25688     return IWMMXT_REGS;
25689
25690   if (IS_IWMMXT_GR_REGNUM (regno))
25691     return IWMMXT_GR_REGS;
25692
25693   return NO_REGS;
25694 }
25695
25696 /* Handle a special case when computing the offset
25697    of an argument from the frame pointer.  */
25698 int
25699 arm_debugger_arg_offset (int value, rtx addr)
25700 {
25701   rtx_insn *insn;
25702
25703   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25704   if (value != 0)
25705     return 0;
25706
25707   /* We can only cope with the case where the address is held in a register.  */
25708   if (!REG_P (addr))
25709     return 0;
25710
25711   /* If we are using the frame pointer to point at the argument, then
25712      an offset of 0 is correct.  */
25713   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25714     return 0;
25715
25716   /* If we are using the stack pointer to point at the
25717      argument, then an offset of 0 is correct.  */
25718   /* ??? Check this is consistent with thumb2 frame layout.  */
25719   if ((TARGET_THUMB || !frame_pointer_needed)
25720       && REGNO (addr) == SP_REGNUM)
25721     return 0;
25722
25723   /* Oh dear.  The argument is pointed to by a register rather
25724      than being held in a register, or being stored at a known
25725      offset from the frame pointer.  Since GDB only understands
25726      those two kinds of argument we must translate the address
25727      held in the register into an offset from the frame pointer.
25728      We do this by searching through the insns for the function
25729      looking to see where this register gets its value.  If the
25730      register is initialized from the frame pointer plus an offset
25731      then we are in luck and we can continue, otherwise we give up.
25732
25733      This code is exercised by producing debugging information
25734      for a function with arguments like this:
25735
25736            double func (double a, double b, int c, double d) {return d;}
25737
25738      Without this code the stab for parameter 'd' will be set to
25739      an offset of 0 from the frame pointer, rather than 8.  */
25740
25741   /* The if() statement says:
25742
25743      If the insn is a normal instruction
25744      and if the insn is setting the value in a register
25745      and if the register being set is the register holding the address of the argument
25746      and if the address is computing by an addition
25747      that involves adding to a register
25748      which is the frame pointer
25749      a constant integer
25750
25751      then...  */
25752
25753   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25754     {
25755       if (   NONJUMP_INSN_P (insn)
25756           && GET_CODE (PATTERN (insn)) == SET
25757           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25758           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25759           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25760           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25761           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25762              )
25763         {
25764           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25765
25766           break;
25767         }
25768     }
25769
25770   if (value == 0)
25771     {
25772       debug_rtx (addr);
25773       warning (0, "unable to compute real location of stacked parameter");
25774       value = 8; /* XXX magic hack */
25775     }
25776
25777   return value;
25778 }
25779 \f
25780 /* Implement TARGET_PROMOTED_TYPE.  */
25781
25782 static tree
25783 arm_promoted_type (const_tree t)
25784 {
25785   if (SCALAR_FLOAT_TYPE_P (t)
25786       && TYPE_PRECISION (t) == 16
25787       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25788     return float_type_node;
25789   return NULL_TREE;
25790 }
25791
25792 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25793    This simply adds HFmode as a supported mode; even though we don't
25794    implement arithmetic on this type directly, it's supported by
25795    optabs conversions, much the way the double-word arithmetic is
25796    special-cased in the default hook.  */
25797
25798 static bool
25799 arm_scalar_mode_supported_p (scalar_mode mode)
25800 {
25801   if (mode == HFmode)
25802     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25803   else if (ALL_FIXED_POINT_MODE_P (mode))
25804     return true;
25805   else
25806     return default_scalar_mode_supported_p (mode);
25807 }
25808
25809 /* Set the value of FLT_EVAL_METHOD.
25810    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25811
25812     0: evaluate all operations and constants, whose semantic type has at
25813        most the range and precision of type float, to the range and
25814        precision of float; evaluate all other operations and constants to
25815        the range and precision of the semantic type;
25816
25817     N, where _FloatN is a supported interchange floating type
25818        evaluate all operations and constants, whose semantic type has at
25819        most the range and precision of _FloatN type, to the range and
25820        precision of the _FloatN type; evaluate all other operations and
25821        constants to the range and precision of the semantic type;
25822
25823    If we have the ARMv8.2-A extensions then we support _Float16 in native
25824    precision, so we should set this to 16.  Otherwise, we support the type,
25825    but want to evaluate expressions in float precision, so set this to
25826    0.  */
25827
25828 static enum flt_eval_method
25829 arm_excess_precision (enum excess_precision_type type)
25830 {
25831   switch (type)
25832     {
25833       case EXCESS_PRECISION_TYPE_FAST:
25834       case EXCESS_PRECISION_TYPE_STANDARD:
25835         /* We can calculate either in 16-bit range and precision or
25836            32-bit range and precision.  Make that decision based on whether
25837            we have native support for the ARMv8.2-A 16-bit floating-point
25838            instructions or not.  */
25839         return (TARGET_VFP_FP16INST
25840                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25841                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25842       case EXCESS_PRECISION_TYPE_IMPLICIT:
25843       case EXCESS_PRECISION_TYPE_FLOAT16:
25844         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25845       default:
25846         gcc_unreachable ();
25847     }
25848   return FLT_EVAL_METHOD_UNPREDICTABLE;
25849 }
25850
25851
25852 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
25853    _Float16 if we are using anything other than ieee format for 16-bit
25854    floating point.  Otherwise, punt to the default implementation.  */
25855 static opt_scalar_float_mode
25856 arm_floatn_mode (int n, bool extended)
25857 {
25858   if (!extended && n == 16)
25859     {
25860       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25861         return HFmode;
25862       return opt_scalar_float_mode ();
25863     }
25864
25865   return default_floatn_mode (n, extended);
25866 }
25867
25868
25869 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25870    not to early-clobber SRC registers in the process.
25871
25872    We assume that the operands described by SRC and DEST represent a
25873    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25874    number of components into which the copy has been decomposed.  */
25875 void
25876 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25877 {
25878   unsigned int i;
25879
25880   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25881       || REGNO (operands[0]) < REGNO (operands[1]))
25882     {
25883       for (i = 0; i < count; i++)
25884         {
25885           operands[2 * i] = dest[i];
25886           operands[2 * i + 1] = src[i];
25887         }
25888     }
25889   else
25890     {
25891       for (i = 0; i < count; i++)
25892         {
25893           operands[2 * i] = dest[count - i - 1];
25894           operands[2 * i + 1] = src[count - i - 1];
25895         }
25896     }
25897 }
25898
25899 /* Split operands into moves from op[1] + op[2] into op[0].  */
25900
25901 void
25902 neon_split_vcombine (rtx operands[3])
25903 {
25904   unsigned int dest = REGNO (operands[0]);
25905   unsigned int src1 = REGNO (operands[1]);
25906   unsigned int src2 = REGNO (operands[2]);
25907   machine_mode halfmode = GET_MODE (operands[1]);
25908   unsigned int halfregs = REG_NREGS (operands[1]);
25909   rtx destlo, desthi;
25910
25911   if (src1 == dest && src2 == dest + halfregs)
25912     {
25913       /* No-op move.  Can't split to nothing; emit something.  */
25914       emit_note (NOTE_INSN_DELETED);
25915       return;
25916     }
25917
25918   /* Preserve register attributes for variable tracking.  */
25919   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25920   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25921                                GET_MODE_SIZE (halfmode));
25922
25923   /* Special case of reversed high/low parts.  Use VSWP.  */
25924   if (src2 == dest && src1 == dest + halfregs)
25925     {
25926       rtx x = gen_rtx_SET (destlo, operands[1]);
25927       rtx y = gen_rtx_SET (desthi, operands[2]);
25928       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25929       return;
25930     }
25931
25932   if (!reg_overlap_mentioned_p (operands[2], destlo))
25933     {
25934       /* Try to avoid unnecessary moves if part of the result
25935          is in the right place already.  */
25936       if (src1 != dest)
25937         emit_move_insn (destlo, operands[1]);
25938       if (src2 != dest + halfregs)
25939         emit_move_insn (desthi, operands[2]);
25940     }
25941   else
25942     {
25943       if (src2 != dest + halfregs)
25944         emit_move_insn (desthi, operands[2]);
25945       if (src1 != dest)
25946         emit_move_insn (destlo, operands[1]);
25947     }
25948 }
25949 \f
25950 /* Return the number (counting from 0) of
25951    the least significant set bit in MASK.  */
25952
25953 inline static int
25954 number_of_first_bit_set (unsigned mask)
25955 {
25956   return ctz_hwi (mask);
25957 }
25958
25959 /* Like emit_multi_reg_push, but allowing for a different set of
25960    registers to be described as saved.  MASK is the set of registers
25961    to be saved; REAL_REGS is the set of registers to be described as
25962    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
25963
25964 static rtx_insn *
25965 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25966 {
25967   unsigned long regno;
25968   rtx par[10], tmp, reg;
25969   rtx_insn *insn;
25970   int i, j;
25971
25972   /* Build the parallel of the registers actually being stored.  */
25973   for (i = 0; mask; ++i, mask &= mask - 1)
25974     {
25975       regno = ctz_hwi (mask);
25976       reg = gen_rtx_REG (SImode, regno);
25977
25978       if (i == 0)
25979         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25980       else
25981         tmp = gen_rtx_USE (VOIDmode, reg);
25982
25983       par[i] = tmp;
25984     }
25985
25986   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25987   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25988   tmp = gen_frame_mem (BLKmode, tmp);
25989   tmp = gen_rtx_SET (tmp, par[0]);
25990   par[0] = tmp;
25991
25992   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25993   insn = emit_insn (tmp);
25994
25995   /* Always build the stack adjustment note for unwind info.  */
25996   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25997   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25998   par[0] = tmp;
25999
26000   /* Build the parallel of the registers recorded as saved for unwind.  */
26001   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26002     {
26003       regno = ctz_hwi (real_regs);
26004       reg = gen_rtx_REG (SImode, regno);
26005
26006       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26007       tmp = gen_frame_mem (SImode, tmp);
26008       tmp = gen_rtx_SET (tmp, reg);
26009       RTX_FRAME_RELATED_P (tmp) = 1;
26010       par[j + 1] = tmp;
26011     }
26012
26013   if (j == 0)
26014     tmp = par[0];
26015   else
26016     {
26017       RTX_FRAME_RELATED_P (par[0]) = 1;
26018       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26019     }
26020
26021   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26022
26023   return insn;
26024 }
26025
26026 /* Emit code to push or pop registers to or from the stack.  F is the
26027    assembly file.  MASK is the registers to pop.  */
26028 static void
26029 thumb_pop (FILE *f, unsigned long mask)
26030 {
26031   int regno;
26032   int lo_mask = mask & 0xFF;
26033
26034   gcc_assert (mask);
26035
26036   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26037     {
26038       /* Special case.  Do not generate a POP PC statement here, do it in
26039          thumb_exit() */
26040       thumb_exit (f, -1);
26041       return;
26042     }
26043
26044   fprintf (f, "\tpop\t{");
26045
26046   /* Look at the low registers first.  */
26047   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26048     {
26049       if (lo_mask & 1)
26050         {
26051           asm_fprintf (f, "%r", regno);
26052
26053           if ((lo_mask & ~1) != 0)
26054             fprintf (f, ", ");
26055         }
26056     }
26057
26058   if (mask & (1 << PC_REGNUM))
26059     {
26060       /* Catch popping the PC.  */
26061       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26062           || IS_CMSE_ENTRY (arm_current_func_type ()))
26063         {
26064           /* The PC is never poped directly, instead
26065              it is popped into r3 and then BX is used.  */
26066           fprintf (f, "}\n");
26067
26068           thumb_exit (f, -1);
26069
26070           return;
26071         }
26072       else
26073         {
26074           if (mask & 0xFF)
26075             fprintf (f, ", ");
26076
26077           asm_fprintf (f, "%r", PC_REGNUM);
26078         }
26079     }
26080
26081   fprintf (f, "}\n");
26082 }
26083
26084 /* Generate code to return from a thumb function.
26085    If 'reg_containing_return_addr' is -1, then the return address is
26086    actually on the stack, at the stack pointer.
26087
26088    Note: do not forget to update length attribute of corresponding insn pattern
26089    when changing assembly output (eg. length attribute of epilogue_insns when
26090    updating Armv8-M Baseline Security Extensions register clearing
26091    sequences).  */
26092 static void
26093 thumb_exit (FILE *f, int reg_containing_return_addr)
26094 {
26095   unsigned regs_available_for_popping;
26096   unsigned regs_to_pop;
26097   int pops_needed;
26098   unsigned available;
26099   unsigned required;
26100   machine_mode mode;
26101   int size;
26102   int restore_a4 = FALSE;
26103
26104   /* Compute the registers we need to pop.  */
26105   regs_to_pop = 0;
26106   pops_needed = 0;
26107
26108   if (reg_containing_return_addr == -1)
26109     {
26110       regs_to_pop |= 1 << LR_REGNUM;
26111       ++pops_needed;
26112     }
26113
26114   if (TARGET_BACKTRACE)
26115     {
26116       /* Restore the (ARM) frame pointer and stack pointer.  */
26117       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26118       pops_needed += 2;
26119     }
26120
26121   /* If there is nothing to pop then just emit the BX instruction and
26122      return.  */
26123   if (pops_needed == 0)
26124     {
26125       if (crtl->calls_eh_return)
26126         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26127
26128       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26129         {
26130           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26131              emitted by cmse_nonsecure_entry_clear_before_return ().  */
26132           if (!TARGET_HAVE_FPCXT_CMSE)
26133             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26134                          reg_containing_return_addr);
26135           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26136         }
26137       else
26138         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26139       return;
26140     }
26141   /* Otherwise if we are not supporting interworking and we have not created
26142      a backtrace structure and the function was not entered in ARM mode then
26143      just pop the return address straight into the PC.  */
26144   else if (!TARGET_INTERWORK
26145            && !TARGET_BACKTRACE
26146            && !is_called_in_ARM_mode (current_function_decl)
26147            && !crtl->calls_eh_return
26148            && !IS_CMSE_ENTRY (arm_current_func_type ()))
26149     {
26150       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26151       return;
26152     }
26153
26154   /* Find out how many of the (return) argument registers we can corrupt.  */
26155   regs_available_for_popping = 0;
26156
26157   /* If returning via __builtin_eh_return, the bottom three registers
26158      all contain information needed for the return.  */
26159   if (crtl->calls_eh_return)
26160     size = 12;
26161   else
26162     {
26163       /* If we can deduce the registers used from the function's
26164          return value.  This is more reliable that examining
26165          df_regs_ever_live_p () because that will be set if the register is
26166          ever used in the function, not just if the register is used
26167          to hold a return value.  */
26168
26169       if (crtl->return_rtx != 0)
26170         mode = GET_MODE (crtl->return_rtx);
26171       else
26172         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26173
26174       size = GET_MODE_SIZE (mode);
26175
26176       if (size == 0)
26177         {
26178           /* In a void function we can use any argument register.
26179              In a function that returns a structure on the stack
26180              we can use the second and third argument registers.  */
26181           if (mode == VOIDmode)
26182             regs_available_for_popping =
26183               (1 << ARG_REGISTER (1))
26184               | (1 << ARG_REGISTER (2))
26185               | (1 << ARG_REGISTER (3));
26186           else
26187             regs_available_for_popping =
26188               (1 << ARG_REGISTER (2))
26189               | (1 << ARG_REGISTER (3));
26190         }
26191       else if (size <= 4)
26192         regs_available_for_popping =
26193           (1 << ARG_REGISTER (2))
26194           | (1 << ARG_REGISTER (3));
26195       else if (size <= 8)
26196         regs_available_for_popping =
26197           (1 << ARG_REGISTER (3));
26198     }
26199
26200   /* Match registers to be popped with registers into which we pop them.  */
26201   for (available = regs_available_for_popping,
26202        required  = regs_to_pop;
26203        required != 0 && available != 0;
26204        available &= ~(available & - available),
26205        required  &= ~(required  & - required))
26206     -- pops_needed;
26207
26208   /* If we have any popping registers left over, remove them.  */
26209   if (available > 0)
26210     regs_available_for_popping &= ~available;
26211
26212   /* Otherwise if we need another popping register we can use
26213      the fourth argument register.  */
26214   else if (pops_needed)
26215     {
26216       /* If we have not found any free argument registers and
26217          reg a4 contains the return address, we must move it.  */
26218       if (regs_available_for_popping == 0
26219           && reg_containing_return_addr == LAST_ARG_REGNUM)
26220         {
26221           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26222           reg_containing_return_addr = LR_REGNUM;
26223         }
26224       else if (size > 12)
26225         {
26226           /* Register a4 is being used to hold part of the return value,
26227              but we have dire need of a free, low register.  */
26228           restore_a4 = TRUE;
26229
26230           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26231         }
26232
26233       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26234         {
26235           /* The fourth argument register is available.  */
26236           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26237
26238           --pops_needed;
26239         }
26240     }
26241
26242   /* Pop as many registers as we can.  */
26243   thumb_pop (f, regs_available_for_popping);
26244
26245   /* Process the registers we popped.  */
26246   if (reg_containing_return_addr == -1)
26247     {
26248       /* The return address was popped into the lowest numbered register.  */
26249       regs_to_pop &= ~(1 << LR_REGNUM);
26250
26251       reg_containing_return_addr =
26252         number_of_first_bit_set (regs_available_for_popping);
26253
26254       /* Remove this register for the mask of available registers, so that
26255          the return address will not be corrupted by further pops.  */
26256       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26257     }
26258
26259   /* If we popped other registers then handle them here.  */
26260   if (regs_available_for_popping)
26261     {
26262       int frame_pointer;
26263
26264       /* Work out which register currently contains the frame pointer.  */
26265       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26266
26267       /* Move it into the correct place.  */
26268       asm_fprintf (f, "\tmov\t%r, %r\n",
26269                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26270
26271       /* (Temporarily) remove it from the mask of popped registers.  */
26272       regs_available_for_popping &= ~(1 << frame_pointer);
26273       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26274
26275       if (regs_available_for_popping)
26276         {
26277           int stack_pointer;
26278
26279           /* We popped the stack pointer as well,
26280              find the register that contains it.  */
26281           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26282
26283           /* Move it into the stack register.  */
26284           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26285
26286           /* At this point we have popped all necessary registers, so
26287              do not worry about restoring regs_available_for_popping
26288              to its correct value:
26289
26290              assert (pops_needed == 0)
26291              assert (regs_available_for_popping == (1 << frame_pointer))
26292              assert (regs_to_pop == (1 << STACK_POINTER))  */
26293         }
26294       else
26295         {
26296           /* Since we have just move the popped value into the frame
26297              pointer, the popping register is available for reuse, and
26298              we know that we still have the stack pointer left to pop.  */
26299           regs_available_for_popping |= (1 << frame_pointer);
26300         }
26301     }
26302
26303   /* If we still have registers left on the stack, but we no longer have
26304      any registers into which we can pop them, then we must move the return
26305      address into the link register and make available the register that
26306      contained it.  */
26307   if (regs_available_for_popping == 0 && pops_needed > 0)
26308     {
26309       regs_available_for_popping |= 1 << reg_containing_return_addr;
26310
26311       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26312                    reg_containing_return_addr);
26313
26314       reg_containing_return_addr = LR_REGNUM;
26315     }
26316
26317   /* If we have registers left on the stack then pop some more.
26318      We know that at most we will want to pop FP and SP.  */
26319   if (pops_needed > 0)
26320     {
26321       int  popped_into;
26322       int  move_to;
26323
26324       thumb_pop (f, regs_available_for_popping);
26325
26326       /* We have popped either FP or SP.
26327          Move whichever one it is into the correct register.  */
26328       popped_into = number_of_first_bit_set (regs_available_for_popping);
26329       move_to     = number_of_first_bit_set (regs_to_pop);
26330
26331       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26332       --pops_needed;
26333     }
26334
26335   /* If we still have not popped everything then we must have only
26336      had one register available to us and we are now popping the SP.  */
26337   if (pops_needed > 0)
26338     {
26339       int  popped_into;
26340
26341       thumb_pop (f, regs_available_for_popping);
26342
26343       popped_into = number_of_first_bit_set (regs_available_for_popping);
26344
26345       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26346       /*
26347         assert (regs_to_pop == (1 << STACK_POINTER))
26348         assert (pops_needed == 1)
26349       */
26350     }
26351
26352   /* If necessary restore the a4 register.  */
26353   if (restore_a4)
26354     {
26355       if (reg_containing_return_addr != LR_REGNUM)
26356         {
26357           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26358           reg_containing_return_addr = LR_REGNUM;
26359         }
26360
26361       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26362     }
26363
26364   if (crtl->calls_eh_return)
26365     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26366
26367   /* Return to caller.  */
26368   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26369     {
26370       /* This is for the cases where LR is not being used to contain the return
26371          address.  It may therefore contain information that we might not want
26372          to leak, hence it must be cleared.  The value in R0 will never be a
26373          secret at this point, so it is safe to use it, see the clearing code
26374          in cmse_nonsecure_entry_clear_before_return ().  */
26375       if (reg_containing_return_addr != LR_REGNUM)
26376         asm_fprintf (f, "\tmov\tlr, r0\n");
26377
26378       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26379          by cmse_nonsecure_entry_clear_before_return ().  */
26380       if (!TARGET_HAVE_FPCXT_CMSE)
26381         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26382       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26383     }
26384   else
26385     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26386 }
26387 \f
26388 /* Scan INSN just before assembler is output for it.
26389    For Thumb-1, we track the status of the condition codes; this
26390    information is used in the cbranchsi4_insn pattern.  */
26391 void
26392 thumb1_final_prescan_insn (rtx_insn *insn)
26393 {
26394   if (flag_print_asm_name)
26395     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26396                  INSN_ADDRESSES (INSN_UID (insn)));
26397   /* Don't overwrite the previous setter when we get to a cbranch.  */
26398   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26399     {
26400       enum attr_conds conds;
26401
26402       if (cfun->machine->thumb1_cc_insn)
26403         {
26404           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26405               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26406             CC_STATUS_INIT;
26407         }
26408       conds = get_attr_conds (insn);
26409       if (conds == CONDS_SET)
26410         {
26411           rtx set = single_set (insn);
26412           cfun->machine->thumb1_cc_insn = insn;
26413           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26414           cfun->machine->thumb1_cc_op1 = const0_rtx;
26415           cfun->machine->thumb1_cc_mode = CC_NZmode;
26416           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26417             {
26418               rtx src1 = XEXP (SET_SRC (set), 1);
26419               if (src1 == const0_rtx)
26420                 cfun->machine->thumb1_cc_mode = CCmode;
26421             }
26422           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26423             {
26424               /* Record the src register operand instead of dest because
26425                  cprop_hardreg pass propagates src.  */
26426               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26427             }
26428         }
26429       else if (conds != CONDS_NOCOND)
26430         cfun->machine->thumb1_cc_insn = NULL_RTX;
26431     }
26432
26433     /* Check if unexpected far jump is used.  */
26434     if (cfun->machine->lr_save_eliminated
26435         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26436       internal_error("Unexpected thumb1 far jump");
26437 }
26438
26439 int
26440 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26441 {
26442   unsigned HOST_WIDE_INT mask = 0xff;
26443   int i;
26444
26445   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26446   if (val == 0) /* XXX */
26447     return 0;
26448
26449   for (i = 0; i < 25; i++)
26450     if ((val & (mask << i)) == val)
26451       return 1;
26452
26453   return 0;
26454 }
26455
26456 /* Returns nonzero if the current function contains,
26457    or might contain a far jump.  */
26458 static int
26459 thumb_far_jump_used_p (void)
26460 {
26461   rtx_insn *insn;
26462   bool far_jump = false;
26463   unsigned int func_size = 0;
26464
26465   /* If we have already decided that far jumps may be used,
26466      do not bother checking again, and always return true even if
26467      it turns out that they are not being used.  Once we have made
26468      the decision that far jumps are present (and that hence the link
26469      register will be pushed onto the stack) we cannot go back on it.  */
26470   if (cfun->machine->far_jump_used)
26471     return 1;
26472
26473   /* If this function is not being called from the prologue/epilogue
26474      generation code then it must be being called from the
26475      INITIAL_ELIMINATION_OFFSET macro.  */
26476   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26477     {
26478       /* In this case we know that we are being asked about the elimination
26479          of the arg pointer register.  If that register is not being used,
26480          then there are no arguments on the stack, and we do not have to
26481          worry that a far jump might force the prologue to push the link
26482          register, changing the stack offsets.  In this case we can just
26483          return false, since the presence of far jumps in the function will
26484          not affect stack offsets.
26485
26486          If the arg pointer is live (or if it was live, but has now been
26487          eliminated and so set to dead) then we do have to test to see if
26488          the function might contain a far jump.  This test can lead to some
26489          false negatives, since before reload is completed, then length of
26490          branch instructions is not known, so gcc defaults to returning their
26491          longest length, which in turn sets the far jump attribute to true.
26492
26493          A false negative will not result in bad code being generated, but it
26494          will result in a needless push and pop of the link register.  We
26495          hope that this does not occur too often.
26496
26497          If we need doubleword stack alignment this could affect the other
26498          elimination offsets so we can't risk getting it wrong.  */
26499       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26500         cfun->machine->arg_pointer_live = 1;
26501       else if (!cfun->machine->arg_pointer_live)
26502         return 0;
26503     }
26504
26505   /* We should not change far_jump_used during or after reload, as there is
26506      no chance to change stack frame layout.  */
26507   if (reload_in_progress || reload_completed)
26508     return 0;
26509
26510   /* Check to see if the function contains a branch
26511      insn with the far jump attribute set.  */
26512   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26513     {
26514       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26515         {
26516           far_jump = true;
26517         }
26518       func_size += get_attr_length (insn);
26519     }
26520
26521   /* Attribute far_jump will always be true for thumb1 before
26522      shorten_branch pass.  So checking far_jump attribute before
26523      shorten_branch isn't much useful.
26524
26525      Following heuristic tries to estimate more accurately if a far jump
26526      may finally be used.  The heuristic is very conservative as there is
26527      no chance to roll-back the decision of not to use far jump.
26528
26529      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26530      2-byte insn is associated with a 4 byte constant pool.  Using
26531      function size 2048/3 as the threshold is conservative enough.  */
26532   if (far_jump)
26533     {
26534       if ((func_size * 3) >= 2048)
26535         {
26536           /* Record the fact that we have decided that
26537              the function does use far jumps.  */
26538           cfun->machine->far_jump_used = 1;
26539           return 1;
26540         }
26541     }
26542
26543   return 0;
26544 }
26545
26546 /* Return nonzero if FUNC must be entered in ARM mode.  */
26547 static bool
26548 is_called_in_ARM_mode (tree func)
26549 {
26550   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26551
26552   /* Ignore the problem about functions whose address is taken.  */
26553   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26554     return true;
26555
26556 #ifdef ARM_PE
26557   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26558 #else
26559   return false;
26560 #endif
26561 }
26562
26563 /* Given the stack offsets and register mask in OFFSETS, decide how
26564    many additional registers to push instead of subtracting a constant
26565    from SP.  For epilogues the principle is the same except we use pop.
26566    FOR_PROLOGUE indicates which we're generating.  */
26567 static int
26568 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26569 {
26570   HOST_WIDE_INT amount;
26571   unsigned long live_regs_mask = offsets->saved_regs_mask;
26572   /* Extract a mask of the ones we can give to the Thumb's push/pop
26573      instruction.  */
26574   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26575   /* Then count how many other high registers will need to be pushed.  */
26576   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26577   int n_free, reg_base, size;
26578
26579   if (!for_prologue && frame_pointer_needed)
26580     amount = offsets->locals_base - offsets->saved_regs;
26581   else
26582     amount = offsets->outgoing_args - offsets->saved_regs;
26583
26584   /* If the stack frame size is 512 exactly, we can save one load
26585      instruction, which should make this a win even when optimizing
26586      for speed.  */
26587   if (!optimize_size && amount != 512)
26588     return 0;
26589
26590   /* Can't do this if there are high registers to push.  */
26591   if (high_regs_pushed != 0)
26592     return 0;
26593
26594   /* Shouldn't do it in the prologue if no registers would normally
26595      be pushed at all.  In the epilogue, also allow it if we'll have
26596      a pop insn for the PC.  */
26597   if  (l_mask == 0
26598        && (for_prologue
26599            || TARGET_BACKTRACE
26600            || (live_regs_mask & 1 << LR_REGNUM) == 0
26601            || TARGET_INTERWORK
26602            || crtl->args.pretend_args_size != 0))
26603     return 0;
26604
26605   /* Don't do this if thumb_expand_prologue wants to emit instructions
26606      between the push and the stack frame allocation.  */
26607   if (for_prologue
26608       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26609           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26610     return 0;
26611
26612   reg_base = 0;
26613   n_free = 0;
26614   if (!for_prologue)
26615     {
26616       size = arm_size_return_regs ();
26617       reg_base = ARM_NUM_INTS (size);
26618       live_regs_mask >>= reg_base;
26619     }
26620
26621   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26622          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26623     {
26624       live_regs_mask >>= 1;
26625       n_free++;
26626     }
26627
26628   if (n_free == 0)
26629     return 0;
26630   gcc_assert (amount / 4 * 4 == amount);
26631
26632   if (amount >= 512 && (amount - n_free * 4) < 512)
26633     return (amount - 508) / 4;
26634   if (amount <= n_free * 4)
26635     return amount / 4;
26636   return 0;
26637 }
26638
26639 /* The bits which aren't usefully expanded as rtl.  */
26640 const char *
26641 thumb1_unexpanded_epilogue (void)
26642 {
26643   arm_stack_offsets *offsets;
26644   int regno;
26645   unsigned long live_regs_mask = 0;
26646   int high_regs_pushed = 0;
26647   int extra_pop;
26648   int had_to_push_lr;
26649   int size;
26650
26651   if (cfun->machine->return_used_this_function != 0)
26652     return "";
26653
26654   if (IS_NAKED (arm_current_func_type ()))
26655     return "";
26656
26657   offsets = arm_get_frame_offsets ();
26658   live_regs_mask = offsets->saved_regs_mask;
26659   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26660
26661   /* If we can deduce the registers used from the function's return value.
26662      This is more reliable that examining df_regs_ever_live_p () because that
26663      will be set if the register is ever used in the function, not just if
26664      the register is used to hold a return value.  */
26665   size = arm_size_return_regs ();
26666
26667   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26668   if (extra_pop > 0)
26669     {
26670       unsigned long extra_mask = (1 << extra_pop) - 1;
26671       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26672     }
26673
26674   /* The prolog may have pushed some high registers to use as
26675      work registers.  e.g. the testsuite file:
26676      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26677      compiles to produce:
26678         push    {r4, r5, r6, r7, lr}
26679         mov     r7, r9
26680         mov     r6, r8
26681         push    {r6, r7}
26682      as part of the prolog.  We have to undo that pushing here.  */
26683
26684   if (high_regs_pushed)
26685     {
26686       unsigned long mask = live_regs_mask & 0xff;
26687       int next_hi_reg;
26688
26689       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26690
26691       if (mask == 0)
26692         /* Oh dear!  We have no low registers into which we can pop
26693            high registers!  */
26694         internal_error
26695           ("no low registers available for popping high registers");
26696
26697       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26698         if (live_regs_mask & (1 << next_hi_reg))
26699           break;
26700
26701       while (high_regs_pushed)
26702         {
26703           /* Find lo register(s) into which the high register(s) can
26704              be popped.  */
26705           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26706             {
26707               if (mask & (1 << regno))
26708                 high_regs_pushed--;
26709               if (high_regs_pushed == 0)
26710                 break;
26711             }
26712
26713           if (high_regs_pushed == 0 && regno >= 0)
26714             mask &= ~((1 << regno) - 1);
26715
26716           /* Pop the values into the low register(s).  */
26717           thumb_pop (asm_out_file, mask);
26718
26719           /* Move the value(s) into the high registers.  */
26720           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26721             {
26722               if (mask & (1 << regno))
26723                 {
26724                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26725                                regno);
26726
26727                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26728                        next_hi_reg--)
26729                     if (live_regs_mask & (1 << next_hi_reg))
26730                       break;
26731                 }
26732             }
26733         }
26734       live_regs_mask &= ~0x0f00;
26735     }
26736
26737   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26738   live_regs_mask &= 0xff;
26739
26740   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26741     {
26742       /* Pop the return address into the PC.  */
26743       if (had_to_push_lr)
26744         live_regs_mask |= 1 << PC_REGNUM;
26745
26746       /* Either no argument registers were pushed or a backtrace
26747          structure was created which includes an adjusted stack
26748          pointer, so just pop everything.  */
26749       if (live_regs_mask)
26750         thumb_pop (asm_out_file, live_regs_mask);
26751
26752       /* We have either just popped the return address into the
26753          PC or it is was kept in LR for the entire function.
26754          Note that thumb_pop has already called thumb_exit if the
26755          PC was in the list.  */
26756       if (!had_to_push_lr)
26757         thumb_exit (asm_out_file, LR_REGNUM);
26758     }
26759   else
26760     {
26761       /* Pop everything but the return address.  */
26762       if (live_regs_mask)
26763         thumb_pop (asm_out_file, live_regs_mask);
26764
26765       if (had_to_push_lr)
26766         {
26767           if (size > 12)
26768             {
26769               /* We have no free low regs, so save one.  */
26770               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26771                            LAST_ARG_REGNUM);
26772             }
26773
26774           /* Get the return address into a temporary register.  */
26775           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26776
26777           if (size > 12)
26778             {
26779               /* Move the return address to lr.  */
26780               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26781                            LAST_ARG_REGNUM);
26782               /* Restore the low register.  */
26783               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26784                            IP_REGNUM);
26785               regno = LR_REGNUM;
26786             }
26787           else
26788             regno = LAST_ARG_REGNUM;
26789         }
26790       else
26791         regno = LR_REGNUM;
26792
26793       /* Remove the argument registers that were pushed onto the stack.  */
26794       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26795                    SP_REGNUM, SP_REGNUM,
26796                    crtl->args.pretend_args_size);
26797
26798       thumb_exit (asm_out_file, regno);
26799     }
26800
26801   return "";
26802 }
26803
26804 /* Functions to save and restore machine-specific function data.  */
26805 static struct machine_function *
26806 arm_init_machine_status (void)
26807 {
26808   struct machine_function *machine;
26809   machine = ggc_cleared_alloc<machine_function> ();
26810
26811 #if ARM_FT_UNKNOWN != 0
26812   machine->func_type = ARM_FT_UNKNOWN;
26813 #endif
26814   machine->static_chain_stack_bytes = -1;
26815   return machine;
26816 }
26817
26818 /* Return an RTX indicating where the return address to the
26819    calling function can be found.  */
26820 rtx
26821 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26822 {
26823   if (count != 0)
26824     return NULL_RTX;
26825
26826   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26827 }
26828
26829 /* Do anything needed before RTL is emitted for each function.  */
26830 void
26831 arm_init_expanders (void)
26832 {
26833   /* Arrange to initialize and mark the machine per-function status.  */
26834   init_machine_status = arm_init_machine_status;
26835
26836   /* This is to stop the combine pass optimizing away the alignment
26837      adjustment of va_arg.  */
26838   /* ??? It is claimed that this should not be necessary.  */
26839   if (cfun)
26840     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26841 }
26842
26843 /* Check that FUNC is called with a different mode.  */
26844
26845 bool
26846 arm_change_mode_p (tree func)
26847 {
26848   if (TREE_CODE (func) != FUNCTION_DECL)
26849     return false;
26850
26851   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26852
26853   if (!callee_tree)
26854     callee_tree = target_option_default_node;
26855
26856   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26857   int flags = callee_opts->x_target_flags;
26858
26859   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26860 }
26861
26862 /* Like arm_compute_initial_elimination offset.  Simpler because there
26863    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
26864    to point at the base of the local variables after static stack
26865    space for a function has been allocated.  */
26866
26867 HOST_WIDE_INT
26868 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26869 {
26870   arm_stack_offsets *offsets;
26871
26872   offsets = arm_get_frame_offsets ();
26873
26874   switch (from)
26875     {
26876     case ARG_POINTER_REGNUM:
26877       switch (to)
26878         {
26879         case STACK_POINTER_REGNUM:
26880           return offsets->outgoing_args - offsets->saved_args;
26881
26882         case FRAME_POINTER_REGNUM:
26883           return offsets->soft_frame - offsets->saved_args;
26884
26885         case ARM_HARD_FRAME_POINTER_REGNUM:
26886           return offsets->saved_regs - offsets->saved_args;
26887
26888         case THUMB_HARD_FRAME_POINTER_REGNUM:
26889           return offsets->locals_base - offsets->saved_args;
26890
26891         default:
26892           gcc_unreachable ();
26893         }
26894       break;
26895
26896     case FRAME_POINTER_REGNUM:
26897       switch (to)
26898         {
26899         case STACK_POINTER_REGNUM:
26900           return offsets->outgoing_args - offsets->soft_frame;
26901
26902         case ARM_HARD_FRAME_POINTER_REGNUM:
26903           return offsets->saved_regs - offsets->soft_frame;
26904
26905         case THUMB_HARD_FRAME_POINTER_REGNUM:
26906           return offsets->locals_base - offsets->soft_frame;
26907
26908         default:
26909           gcc_unreachable ();
26910         }
26911       break;
26912
26913     default:
26914       gcc_unreachable ();
26915     }
26916 }
26917
26918 /* Generate the function's prologue.  */
26919
26920 void
26921 thumb1_expand_prologue (void)
26922 {
26923   rtx_insn *insn;
26924
26925   HOST_WIDE_INT amount;
26926   HOST_WIDE_INT size;
26927   arm_stack_offsets *offsets;
26928   unsigned long func_type;
26929   int regno;
26930   unsigned long live_regs_mask;
26931   unsigned long l_mask;
26932   unsigned high_regs_pushed = 0;
26933   bool lr_needs_saving;
26934
26935   func_type = arm_current_func_type ();
26936
26937   /* Naked functions don't have prologues.  */
26938   if (IS_NAKED (func_type))
26939     {
26940       if (flag_stack_usage_info)
26941         current_function_static_stack_size = 0;
26942       return;
26943     }
26944
26945   if (IS_INTERRUPT (func_type))
26946     {
26947       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26948       return;
26949     }
26950
26951   if (is_called_in_ARM_mode (current_function_decl))
26952     emit_insn (gen_prologue_thumb1_interwork ());
26953
26954   offsets = arm_get_frame_offsets ();
26955   live_regs_mask = offsets->saved_regs_mask;
26956   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26957
26958   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
26959   l_mask = live_regs_mask & 0x40ff;
26960   /* Then count how many other high registers will need to be pushed.  */
26961   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26962
26963   if (crtl->args.pretend_args_size)
26964     {
26965       rtx x = GEN_INT (-crtl->args.pretend_args_size);
26966
26967       if (cfun->machine->uses_anonymous_args)
26968         {
26969           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26970           unsigned long mask;
26971
26972           mask = 1ul << (LAST_ARG_REGNUM + 1);
26973           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26974
26975           insn = thumb1_emit_multi_reg_push (mask, 0);
26976         }
26977       else
26978         {
26979           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26980                                         stack_pointer_rtx, x));
26981         }
26982       RTX_FRAME_RELATED_P (insn) = 1;
26983     }
26984
26985   if (TARGET_BACKTRACE)
26986     {
26987       HOST_WIDE_INT offset = 0;
26988       unsigned work_register;
26989       rtx work_reg, x, arm_hfp_rtx;
26990
26991       /* We have been asked to create a stack backtrace structure.
26992          The code looks like this:
26993
26994          0   .align 2
26995          0   func:
26996          0     sub   SP, #16         Reserve space for 4 registers.
26997          2     push  {R7}            Push low registers.
26998          4     add   R7, SP, #20     Get the stack pointer before the push.
26999          6     str   R7, [SP, #8]    Store the stack pointer
27000                                         (before reserving the space).
27001          8     mov   R7, PC          Get hold of the start of this code + 12.
27002         10     str   R7, [SP, #16]   Store it.
27003         12     mov   R7, FP          Get hold of the current frame pointer.
27004         14     str   R7, [SP, #4]    Store it.
27005         16     mov   R7, LR          Get hold of the current return address.
27006         18     str   R7, [SP, #12]   Store it.
27007         20     add   R7, SP, #16     Point at the start of the
27008                                         backtrace structure.
27009         22     mov   FP, R7          Put this value into the frame pointer.  */
27010
27011       work_register = thumb_find_work_register (live_regs_mask);
27012       work_reg = gen_rtx_REG (SImode, work_register);
27013       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27014
27015       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27016                                     stack_pointer_rtx, GEN_INT (-16)));
27017       RTX_FRAME_RELATED_P (insn) = 1;
27018
27019       if (l_mask)
27020         {
27021           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27022           RTX_FRAME_RELATED_P (insn) = 1;
27023           lr_needs_saving = false;
27024
27025           offset = bit_count (l_mask) * UNITS_PER_WORD;
27026         }
27027
27028       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27029       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27030
27031       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27032       x = gen_frame_mem (SImode, x);
27033       emit_move_insn (x, work_reg);
27034
27035       /* Make sure that the instruction fetching the PC is in the right place
27036          to calculate "start of backtrace creation code + 12".  */
27037       /* ??? The stores using the common WORK_REG ought to be enough to
27038          prevent the scheduler from doing anything weird.  Failing that
27039          we could always move all of the following into an UNSPEC_VOLATILE.  */
27040       if (l_mask)
27041         {
27042           x = gen_rtx_REG (SImode, PC_REGNUM);
27043           emit_move_insn (work_reg, x);
27044
27045           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27046           x = gen_frame_mem (SImode, x);
27047           emit_move_insn (x, work_reg);
27048
27049           emit_move_insn (work_reg, arm_hfp_rtx);
27050
27051           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27052           x = gen_frame_mem (SImode, x);
27053           emit_move_insn (x, work_reg);
27054         }
27055       else
27056         {
27057           emit_move_insn (work_reg, arm_hfp_rtx);
27058
27059           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27060           x = gen_frame_mem (SImode, x);
27061           emit_move_insn (x, work_reg);
27062
27063           x = gen_rtx_REG (SImode, PC_REGNUM);
27064           emit_move_insn (work_reg, x);
27065
27066           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27067           x = gen_frame_mem (SImode, x);
27068           emit_move_insn (x, work_reg);
27069         }
27070
27071       x = gen_rtx_REG (SImode, LR_REGNUM);
27072       emit_move_insn (work_reg, x);
27073
27074       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27075       x = gen_frame_mem (SImode, x);
27076       emit_move_insn (x, work_reg);
27077
27078       x = GEN_INT (offset + 12);
27079       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27080
27081       emit_move_insn (arm_hfp_rtx, work_reg);
27082     }
27083   /* Optimization:  If we are not pushing any low registers but we are going
27084      to push some high registers then delay our first push.  This will just
27085      be a push of LR and we can combine it with the push of the first high
27086      register.  */
27087   else if ((l_mask & 0xff) != 0
27088            || (high_regs_pushed == 0 && lr_needs_saving))
27089     {
27090       unsigned long mask = l_mask;
27091       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27092       insn = thumb1_emit_multi_reg_push (mask, mask);
27093       RTX_FRAME_RELATED_P (insn) = 1;
27094       lr_needs_saving = false;
27095     }
27096
27097   if (high_regs_pushed)
27098     {
27099       unsigned pushable_regs;
27100       unsigned next_hi_reg;
27101       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27102                                                  : crtl->args.info.nregs;
27103       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27104
27105       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27106         if (live_regs_mask & (1 << next_hi_reg))
27107           break;
27108
27109       /* Here we need to mask out registers used for passing arguments
27110          even if they can be pushed.  This is to avoid using them to
27111          stash the high registers.  Such kind of stash may clobber the
27112          use of arguments.  */
27113       pushable_regs = l_mask & (~arg_regs_mask);
27114       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27115
27116       /* Normally, LR can be used as a scratch register once it has been
27117          saved; but if the function examines its own return address then
27118          the value is still live and we need to avoid using it.  */
27119       bool return_addr_live
27120         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27121                            LR_REGNUM);
27122
27123       if (lr_needs_saving || return_addr_live)
27124         pushable_regs &= ~(1 << LR_REGNUM);
27125
27126       if (pushable_regs == 0)
27127         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27128
27129       while (high_regs_pushed > 0)
27130         {
27131           unsigned long real_regs_mask = 0;
27132           unsigned long push_mask = 0;
27133
27134           for (regno = LR_REGNUM; regno >= 0; regno --)
27135             {
27136               if (pushable_regs & (1 << regno))
27137                 {
27138                   emit_move_insn (gen_rtx_REG (SImode, regno),
27139                                   gen_rtx_REG (SImode, next_hi_reg));
27140
27141                   high_regs_pushed --;
27142                   real_regs_mask |= (1 << next_hi_reg);
27143                   push_mask |= (1 << regno);
27144
27145                   if (high_regs_pushed)
27146                     {
27147                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27148                            next_hi_reg --)
27149                         if (live_regs_mask & (1 << next_hi_reg))
27150                           break;
27151                     }
27152                   else
27153                     break;
27154                 }
27155             }
27156
27157           /* If we had to find a work register and we have not yet
27158              saved the LR then add it to the list of regs to push.  */
27159           if (lr_needs_saving)
27160             {
27161               push_mask |= 1 << LR_REGNUM;
27162               real_regs_mask |= 1 << LR_REGNUM;
27163               lr_needs_saving = false;
27164               /* If the return address is not live at this point, we
27165                  can add LR to the list of registers that we can use
27166                  for pushes.  */
27167               if (!return_addr_live)
27168                 pushable_regs |= 1 << LR_REGNUM;
27169             }
27170
27171           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27172           RTX_FRAME_RELATED_P (insn) = 1;
27173         }
27174     }
27175
27176   /* Load the pic register before setting the frame pointer,
27177      so we can use r7 as a temporary work register.  */
27178   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27179     arm_load_pic_register (live_regs_mask, NULL_RTX);
27180
27181   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27182     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27183                     stack_pointer_rtx);
27184
27185   size = offsets->outgoing_args - offsets->saved_args;
27186   if (flag_stack_usage_info)
27187     current_function_static_stack_size = size;
27188
27189   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
27190   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27191        || flag_stack_clash_protection)
27192       && size)
27193     sorry ("%<-fstack-check=specific%> for Thumb-1");
27194
27195   amount = offsets->outgoing_args - offsets->saved_regs;
27196   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27197   if (amount)
27198     {
27199       if (amount < 512)
27200         {
27201           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27202                                         GEN_INT (- amount)));
27203           RTX_FRAME_RELATED_P (insn) = 1;
27204         }
27205       else
27206         {
27207           rtx reg, dwarf;
27208
27209           /* The stack decrement is too big for an immediate value in a single
27210              insn.  In theory we could issue multiple subtracts, but after
27211              three of them it becomes more space efficient to place the full
27212              value in the constant pool and load into a register.  (Also the
27213              ARM debugger really likes to see only one stack decrement per
27214              function).  So instead we look for a scratch register into which
27215              we can load the decrement, and then we subtract this from the
27216              stack pointer.  Unfortunately on the thumb the only available
27217              scratch registers are the argument registers, and we cannot use
27218              these as they may hold arguments to the function.  Instead we
27219              attempt to locate a call preserved register which is used by this
27220              function.  If we can find one, then we know that it will have
27221              been pushed at the start of the prologue and so we can corrupt
27222              it now.  */
27223           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27224             if (live_regs_mask & (1 << regno))
27225               break;
27226
27227           gcc_assert(regno <= LAST_LO_REGNUM);
27228
27229           reg = gen_rtx_REG (SImode, regno);
27230
27231           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27232
27233           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27234                                         stack_pointer_rtx, reg));
27235
27236           dwarf = gen_rtx_SET (stack_pointer_rtx,
27237                                plus_constant (Pmode, stack_pointer_rtx,
27238                                               -amount));
27239           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27240           RTX_FRAME_RELATED_P (insn) = 1;
27241         }
27242     }
27243
27244   if (frame_pointer_needed)
27245     thumb_set_frame_pointer (offsets);
27246
27247   /* If we are profiling, make sure no instructions are scheduled before
27248      the call to mcount.  Similarly if the user has requested no
27249      scheduling in the prolog.  Similarly if we want non-call exceptions
27250      using the EABI unwinder, to prevent faulting instructions from being
27251      swapped with a stack adjustment.  */
27252   if (crtl->profile || !TARGET_SCHED_PROLOG
27253       || (arm_except_unwind_info (&global_options) == UI_TARGET
27254           && cfun->can_throw_non_call_exceptions))
27255     emit_insn (gen_blockage ());
27256
27257   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27258   if (live_regs_mask & 0xff)
27259     cfun->machine->lr_save_eliminated = 0;
27260 }
27261
27262 /* Clear caller saved registers not used to pass return values and leaked
27263    condition flags before exiting a cmse_nonsecure_entry function.  */
27264
27265 void
27266 cmse_nonsecure_entry_clear_before_return (void)
27267 {
27268   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27269   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27270   uint32_t padding_bits_to_clear = 0;
27271   auto_sbitmap to_clear_bitmap (maxregno + 1);
27272   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27273   tree result_type;
27274
27275   bitmap_clear (to_clear_bitmap);
27276   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27277   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27278
27279   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27280      registers.  */
27281   if (clear_vfpregs)
27282     {
27283       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27284
27285       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27286
27287       if (!TARGET_HAVE_FPCXT_CMSE)
27288         {
27289           /* Make sure we don't clear the two scratch registers used to clear
27290              the relevant FPSCR bits in output_return_instruction.  */
27291           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27292           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27293           emit_use (gen_rtx_REG (SImode, 4));
27294           bitmap_clear_bit (to_clear_bitmap, 4);
27295         }
27296     }
27297
27298   /* If the user has defined registers to be caller saved, these are no longer
27299      restored by the function before returning and must thus be cleared for
27300      security purposes.  */
27301   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27302     {
27303       /* We do not touch registers that can be used to pass arguments as per
27304          the AAPCS, since these should never be made callee-saved by user
27305          options.  */
27306       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27307         continue;
27308       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27309         continue;
27310       if (!callee_saved_reg_p (regno)
27311           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27312               || TARGET_HARD_FLOAT))
27313         bitmap_set_bit (to_clear_bitmap, regno);
27314     }
27315
27316   /* Make sure we do not clear the registers used to return the result in.  */
27317   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27318   if (!VOID_TYPE_P (result_type))
27319     {
27320       uint64_t to_clear_return_mask;
27321       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27322
27323       /* No need to check that we return in registers, because we don't
27324          support returning on stack yet.  */
27325       gcc_assert (REG_P (result_rtl));
27326       to_clear_return_mask
27327         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27328                                      &padding_bits_to_clear);
27329       if (to_clear_return_mask)
27330         {
27331           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27332           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27333             {
27334               if (to_clear_return_mask & (1ULL << regno))
27335                 bitmap_clear_bit (to_clear_bitmap, regno);
27336             }
27337         }
27338     }
27339
27340   if (padding_bits_to_clear != 0)
27341     {
27342       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27343       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27344
27345       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27346          returning a composite type, which only uses r0.  Let's make sure that
27347          r1-r3 is cleared too.  */
27348       bitmap_clear (to_clear_arg_regs_bitmap);
27349       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27350       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27351     }
27352
27353   /* Clear full registers that leak before returning.  */
27354   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27355   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27356   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27357                         clearing_reg);
27358 }
27359
27360 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27361    POP instruction can be generated.  LR should be replaced by PC.  All
27362    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27363    all we really need to check here is if single register is to be
27364    returned, or multiple register return.  */
27365 void
27366 thumb2_expand_return (bool simple_return)
27367 {
27368   int i, num_regs;
27369   unsigned long saved_regs_mask;
27370   arm_stack_offsets *offsets;
27371
27372   offsets = arm_get_frame_offsets ();
27373   saved_regs_mask = offsets->saved_regs_mask;
27374
27375   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27376     if (saved_regs_mask & (1 << i))
27377       num_regs++;
27378
27379   if (!simple_return && saved_regs_mask)
27380     {
27381       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27382          functions or adapt code to handle according to ACLE.  This path should
27383          not be reachable for cmse_nonsecure_entry functions though we prefer
27384          to assert it for now to ensure that future code changes do not silently
27385          change this behavior.  */
27386       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27387       if (num_regs == 1)
27388         {
27389           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27390           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27391           rtx addr = gen_rtx_MEM (SImode,
27392                                   gen_rtx_POST_INC (SImode,
27393                                                     stack_pointer_rtx));
27394           set_mem_alias_set (addr, get_frame_alias_set ());
27395           XVECEXP (par, 0, 0) = ret_rtx;
27396           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27397           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27398           emit_jump_insn (par);
27399         }
27400       else
27401         {
27402           saved_regs_mask &= ~ (1 << LR_REGNUM);
27403           saved_regs_mask |=   (1 << PC_REGNUM);
27404           arm_emit_multi_reg_pop (saved_regs_mask);
27405         }
27406     }
27407   else
27408     {
27409       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27410         cmse_nonsecure_entry_clear_before_return ();
27411       emit_jump_insn (simple_return_rtx);
27412     }
27413 }
27414
27415 void
27416 thumb1_expand_epilogue (void)
27417 {
27418   HOST_WIDE_INT amount;
27419   arm_stack_offsets *offsets;
27420   int regno;
27421
27422   /* Naked functions don't have prologues.  */
27423   if (IS_NAKED (arm_current_func_type ()))
27424     return;
27425
27426   offsets = arm_get_frame_offsets ();
27427   amount = offsets->outgoing_args - offsets->saved_regs;
27428
27429   if (frame_pointer_needed)
27430     {
27431       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27432       amount = offsets->locals_base - offsets->saved_regs;
27433     }
27434   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27435
27436   gcc_assert (amount >= 0);
27437   if (amount)
27438     {
27439       emit_insn (gen_blockage ());
27440
27441       if (amount < 512)
27442         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27443                                GEN_INT (amount)));
27444       else
27445         {
27446           /* r3 is always free in the epilogue.  */
27447           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27448
27449           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27450           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27451         }
27452     }
27453
27454   /* Emit a USE (stack_pointer_rtx), so that
27455      the stack adjustment will not be deleted.  */
27456   emit_insn (gen_force_register_use (stack_pointer_rtx));
27457
27458   if (crtl->profile || !TARGET_SCHED_PROLOG)
27459     emit_insn (gen_blockage ());
27460
27461   /* Emit a clobber for each insn that will be restored in the epilogue,
27462      so that flow2 will get register lifetimes correct.  */
27463   for (regno = 0; regno < 13; regno++)
27464     if (reg_needs_saving_p (regno))
27465       emit_clobber (gen_rtx_REG (SImode, regno));
27466
27467   if (! df_regs_ever_live_p (LR_REGNUM))
27468     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27469
27470   /* Clear all caller-saved regs that are not used to return.  */
27471   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27472     cmse_nonsecure_entry_clear_before_return ();
27473 }
27474
27475 /* Epilogue code for APCS frame.  */
27476 static void
27477 arm_expand_epilogue_apcs_frame (bool really_return)
27478 {
27479   unsigned long func_type;
27480   unsigned long saved_regs_mask;
27481   int num_regs = 0;
27482   int i;
27483   int floats_from_frame = 0;
27484   arm_stack_offsets *offsets;
27485
27486   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27487   func_type = arm_current_func_type ();
27488
27489   /* Get frame offsets for ARM.  */
27490   offsets = arm_get_frame_offsets ();
27491   saved_regs_mask = offsets->saved_regs_mask;
27492
27493   /* Find the offset of the floating-point save area in the frame.  */
27494   floats_from_frame
27495     = (offsets->saved_args
27496        + arm_compute_static_chain_stack_bytes ()
27497        - offsets->frame);
27498
27499   /* Compute how many core registers saved and how far away the floats are.  */
27500   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27501     if (saved_regs_mask & (1 << i))
27502       {
27503         num_regs++;
27504         floats_from_frame += 4;
27505       }
27506
27507   if (TARGET_VFP_BASE)
27508     {
27509       int start_reg;
27510       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27511
27512       /* The offset is from IP_REGNUM.  */
27513       int saved_size = arm_get_vfp_saved_size ();
27514       if (saved_size > 0)
27515         {
27516           rtx_insn *insn;
27517           floats_from_frame += saved_size;
27518           insn = emit_insn (gen_addsi3 (ip_rtx,
27519                                         hard_frame_pointer_rtx,
27520                                         GEN_INT (-floats_from_frame)));
27521           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27522                                        ip_rtx, hard_frame_pointer_rtx);
27523         }
27524
27525       /* Generate VFP register multi-pop.  */
27526       start_reg = FIRST_VFP_REGNUM;
27527
27528       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27529         /* Look for a case where a reg does not need restoring.  */
27530         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27531           {
27532             if (start_reg != i)
27533               arm_emit_vfp_multi_reg_pop (start_reg,
27534                                           (i - start_reg) / 2,
27535                                           gen_rtx_REG (SImode,
27536                                                        IP_REGNUM));
27537             start_reg = i + 2;
27538           }
27539
27540       /* Restore the remaining regs that we have discovered (or possibly
27541          even all of them, if the conditional in the for loop never
27542          fired).  */
27543       if (start_reg != i)
27544         arm_emit_vfp_multi_reg_pop (start_reg,
27545                                     (i - start_reg) / 2,
27546                                     gen_rtx_REG (SImode, IP_REGNUM));
27547     }
27548
27549   if (TARGET_IWMMXT)
27550     {
27551       /* The frame pointer is guaranteed to be non-double-word aligned, as
27552          it is set to double-word-aligned old_stack_pointer - 4.  */
27553       rtx_insn *insn;
27554       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27555
27556       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27557         if (reg_needs_saving_p (i))
27558           {
27559             rtx addr = gen_frame_mem (V2SImode,
27560                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27561                                                 - lrm_count * 4));
27562             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27563             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27564                                                gen_rtx_REG (V2SImode, i),
27565                                                NULL_RTX);
27566             lrm_count += 2;
27567           }
27568     }
27569
27570   /* saved_regs_mask should contain IP which contains old stack pointer
27571      at the time of activation creation.  Since SP and IP are adjacent registers,
27572      we can restore the value directly into SP.  */
27573   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27574   saved_regs_mask &= ~(1 << IP_REGNUM);
27575   saved_regs_mask |= (1 << SP_REGNUM);
27576
27577   /* There are two registers left in saved_regs_mask - LR and PC.  We
27578      only need to restore LR (the return address), but to
27579      save time we can load it directly into PC, unless we need a
27580      special function exit sequence, or we are not really returning.  */
27581   if (really_return
27582       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27583       && !crtl->calls_eh_return)
27584     /* Delete LR from the register mask, so that LR on
27585        the stack is loaded into the PC in the register mask.  */
27586     saved_regs_mask &= ~(1 << LR_REGNUM);
27587   else
27588     saved_regs_mask &= ~(1 << PC_REGNUM);
27589
27590   num_regs = bit_count (saved_regs_mask);
27591   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27592     {
27593       rtx_insn *insn;
27594       emit_insn (gen_blockage ());
27595       /* Unwind the stack to just below the saved registers.  */
27596       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27597                                     hard_frame_pointer_rtx,
27598                                     GEN_INT (- 4 * num_regs)));
27599
27600       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27601                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27602     }
27603
27604   arm_emit_multi_reg_pop (saved_regs_mask);
27605
27606   if (IS_INTERRUPT (func_type))
27607     {
27608       /* Interrupt handlers will have pushed the
27609          IP onto the stack, so restore it now.  */
27610       rtx_insn *insn;
27611       rtx addr = gen_rtx_MEM (SImode,
27612                               gen_rtx_POST_INC (SImode,
27613                               stack_pointer_rtx));
27614       set_mem_alias_set (addr, get_frame_alias_set ());
27615       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27616       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27617                                          gen_rtx_REG (SImode, IP_REGNUM),
27618                                          NULL_RTX);
27619     }
27620
27621   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27622     return;
27623
27624   if (crtl->calls_eh_return)
27625     emit_insn (gen_addsi3 (stack_pointer_rtx,
27626                            stack_pointer_rtx,
27627                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27628
27629   if (IS_STACKALIGN (func_type))
27630     /* Restore the original stack pointer.  Before prologue, the stack was
27631        realigned and the original stack pointer saved in r0.  For details,
27632        see comment in arm_expand_prologue.  */
27633     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27634
27635   emit_jump_insn (simple_return_rtx);
27636 }
27637
27638 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27639    function is not a sibcall.  */
27640 void
27641 arm_expand_epilogue (bool really_return)
27642 {
27643   unsigned long func_type;
27644   unsigned long saved_regs_mask;
27645   int num_regs = 0;
27646   int i;
27647   int amount;
27648   arm_stack_offsets *offsets;
27649
27650   func_type = arm_current_func_type ();
27651
27652   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27653      let output_return_instruction take care of instruction emission if any.  */
27654   if (IS_NAKED (func_type)
27655       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27656     {
27657       if (really_return)
27658         emit_jump_insn (simple_return_rtx);
27659       return;
27660     }
27661
27662   /* If we are throwing an exception, then we really must be doing a
27663      return, so we can't tail-call.  */
27664   gcc_assert (!crtl->calls_eh_return || really_return);
27665
27666   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27667     {
27668       arm_expand_epilogue_apcs_frame (really_return);
27669       return;
27670     }
27671
27672   /* Get frame offsets for ARM.  */
27673   offsets = arm_get_frame_offsets ();
27674   saved_regs_mask = offsets->saved_regs_mask;
27675   num_regs = bit_count (saved_regs_mask);
27676
27677   if (frame_pointer_needed)
27678     {
27679       rtx_insn *insn;
27680       /* Restore stack pointer if necessary.  */
27681       if (TARGET_ARM)
27682         {
27683           /* In ARM mode, frame pointer points to first saved register.
27684              Restore stack pointer to last saved register.  */
27685           amount = offsets->frame - offsets->saved_regs;
27686
27687           /* Force out any pending memory operations that reference stacked data
27688              before stack de-allocation occurs.  */
27689           emit_insn (gen_blockage ());
27690           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27691                             hard_frame_pointer_rtx,
27692                             GEN_INT (amount)));
27693           arm_add_cfa_adjust_cfa_note (insn, amount,
27694                                        stack_pointer_rtx,
27695                                        hard_frame_pointer_rtx);
27696
27697           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27698              deleted.  */
27699           emit_insn (gen_force_register_use (stack_pointer_rtx));
27700         }
27701       else
27702         {
27703           /* In Thumb-2 mode, the frame pointer points to the last saved
27704              register.  */
27705           amount = offsets->locals_base - offsets->saved_regs;
27706           if (amount)
27707             {
27708               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27709                                 hard_frame_pointer_rtx,
27710                                 GEN_INT (amount)));
27711               arm_add_cfa_adjust_cfa_note (insn, amount,
27712                                            hard_frame_pointer_rtx,
27713                                            hard_frame_pointer_rtx);
27714             }
27715
27716           /* Force out any pending memory operations that reference stacked data
27717              before stack de-allocation occurs.  */
27718           emit_insn (gen_blockage ());
27719           insn = emit_insn (gen_movsi (stack_pointer_rtx,
27720                                        hard_frame_pointer_rtx));
27721           arm_add_cfa_adjust_cfa_note (insn, 0,
27722                                        stack_pointer_rtx,
27723                                        hard_frame_pointer_rtx);
27724           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27725              deleted.  */
27726           emit_insn (gen_force_register_use (stack_pointer_rtx));
27727         }
27728     }
27729   else
27730     {
27731       /* Pop off outgoing args and local frame to adjust stack pointer to
27732          last saved register.  */
27733       amount = offsets->outgoing_args - offsets->saved_regs;
27734       if (amount)
27735         {
27736           rtx_insn *tmp;
27737           /* Force out any pending memory operations that reference stacked data
27738              before stack de-allocation occurs.  */
27739           emit_insn (gen_blockage ());
27740           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27741                                        stack_pointer_rtx,
27742                                        GEN_INT (amount)));
27743           arm_add_cfa_adjust_cfa_note (tmp, amount,
27744                                        stack_pointer_rtx, stack_pointer_rtx);
27745           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27746              not deleted.  */
27747           emit_insn (gen_force_register_use (stack_pointer_rtx));
27748         }
27749     }
27750
27751   if (TARGET_VFP_BASE)
27752     {
27753       /* Generate VFP register multi-pop.  */
27754       int end_reg = LAST_VFP_REGNUM + 1;
27755
27756       /* Scan the registers in reverse order.  We need to match
27757          any groupings made in the prologue and generate matching
27758          vldm operations.  The need to match groups is because,
27759          unlike pop, vldm can only do consecutive regs.  */
27760       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27761         /* Look for a case where a reg does not need restoring.  */
27762         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27763           {
27764             /* Restore the regs discovered so far (from reg+2 to
27765                end_reg).  */
27766             if (end_reg > i + 2)
27767               arm_emit_vfp_multi_reg_pop (i + 2,
27768                                           (end_reg - (i + 2)) / 2,
27769                                           stack_pointer_rtx);
27770             end_reg = i;
27771           }
27772
27773       /* Restore the remaining regs that we have discovered (or possibly
27774          even all of them, if the conditional in the for loop never
27775          fired).  */
27776       if (end_reg > i + 2)
27777         arm_emit_vfp_multi_reg_pop (i + 2,
27778                                     (end_reg - (i + 2)) / 2,
27779                                     stack_pointer_rtx);
27780     }
27781
27782   if (TARGET_IWMMXT)
27783     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27784       if (reg_needs_saving_p (i))
27785         {
27786           rtx_insn *insn;
27787           rtx addr = gen_rtx_MEM (V2SImode,
27788                                   gen_rtx_POST_INC (SImode,
27789                                                     stack_pointer_rtx));
27790           set_mem_alias_set (addr, get_frame_alias_set ());
27791           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27792           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27793                                              gen_rtx_REG (V2SImode, i),
27794                                              NULL_RTX);
27795           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27796                                        stack_pointer_rtx, stack_pointer_rtx);
27797         }
27798
27799   if (saved_regs_mask)
27800     {
27801       rtx insn;
27802       bool return_in_pc = false;
27803
27804       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27805           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27806           && !IS_CMSE_ENTRY (func_type)
27807           && !IS_STACKALIGN (func_type)
27808           && really_return
27809           && crtl->args.pretend_args_size == 0
27810           && saved_regs_mask & (1 << LR_REGNUM)
27811           && !crtl->calls_eh_return)
27812         {
27813           saved_regs_mask &= ~(1 << LR_REGNUM);
27814           saved_regs_mask |= (1 << PC_REGNUM);
27815           return_in_pc = true;
27816         }
27817
27818       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27819         {
27820           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27821             if (saved_regs_mask & (1 << i))
27822               {
27823                 rtx addr = gen_rtx_MEM (SImode,
27824                                         gen_rtx_POST_INC (SImode,
27825                                                           stack_pointer_rtx));
27826                 set_mem_alias_set (addr, get_frame_alias_set ());
27827
27828                 if (i == PC_REGNUM)
27829                   {
27830                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27831                     XVECEXP (insn, 0, 0) = ret_rtx;
27832                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27833                                                         addr);
27834                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27835                     insn = emit_jump_insn (insn);
27836                   }
27837                 else
27838                   {
27839                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27840                                                  addr));
27841                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27842                                                        gen_rtx_REG (SImode, i),
27843                                                        NULL_RTX);
27844                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27845                                                  stack_pointer_rtx,
27846                                                  stack_pointer_rtx);
27847                   }
27848               }
27849         }
27850       else
27851         {
27852           if (TARGET_LDRD
27853               && current_tune->prefer_ldrd_strd
27854               && !optimize_function_for_size_p (cfun))
27855             {
27856               if (TARGET_THUMB2)
27857                 thumb2_emit_ldrd_pop (saved_regs_mask);
27858               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27859                 arm_emit_ldrd_pop (saved_regs_mask);
27860               else
27861                 arm_emit_multi_reg_pop (saved_regs_mask);
27862             }
27863           else
27864             arm_emit_multi_reg_pop (saved_regs_mask);
27865         }
27866
27867       if (return_in_pc)
27868         return;
27869     }
27870
27871   amount
27872     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27873   if (amount)
27874     {
27875       int i, j;
27876       rtx dwarf = NULL_RTX;
27877       rtx_insn *tmp =
27878         emit_insn (gen_addsi3 (stack_pointer_rtx,
27879                                stack_pointer_rtx,
27880                                GEN_INT (amount)));
27881
27882       RTX_FRAME_RELATED_P (tmp) = 1;
27883
27884       if (cfun->machine->uses_anonymous_args)
27885         {
27886           /* Restore pretend args.  Refer arm_expand_prologue on how to save
27887              pretend_args in stack.  */
27888           int num_regs = crtl->args.pretend_args_size / 4;
27889           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27890           for (j = 0, i = 0; j < num_regs; i++)
27891             if (saved_regs_mask & (1 << i))
27892               {
27893                 rtx reg = gen_rtx_REG (SImode, i);
27894                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27895                 j++;
27896               }
27897           REG_NOTES (tmp) = dwarf;
27898         }
27899       arm_add_cfa_adjust_cfa_note (tmp, amount,
27900                                    stack_pointer_rtx, stack_pointer_rtx);
27901     }
27902
27903   if (IS_CMSE_ENTRY (func_type))
27904     {
27905       /* CMSE_ENTRY always returns.  */
27906       gcc_assert (really_return);
27907       /* Clear all caller-saved regs that are not used to return.  */
27908       cmse_nonsecure_entry_clear_before_return ();
27909
27910       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27911          VLDR.  */
27912       if (TARGET_HAVE_FPCXT_CMSE)
27913         {
27914           rtx_insn *insn;
27915
27916           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27917                                                    GEN_INT (FPCXTNS_ENUM)));
27918           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27919                                   plus_constant (Pmode, stack_pointer_rtx, 4));
27920           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27921           RTX_FRAME_RELATED_P (insn) = 1;
27922         }
27923     }
27924
27925   if (!really_return)
27926     return;
27927
27928   if (crtl->calls_eh_return)
27929     emit_insn (gen_addsi3 (stack_pointer_rtx,
27930                            stack_pointer_rtx,
27931                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27932
27933   if (IS_STACKALIGN (func_type))
27934     /* Restore the original stack pointer.  Before prologue, the stack was
27935        realigned and the original stack pointer saved in r0.  For details,
27936        see comment in arm_expand_prologue.  */
27937     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27938
27939   emit_jump_insn (simple_return_rtx);
27940 }
27941
27942 /* Implementation of insn prologue_thumb1_interwork.  This is the first
27943    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
27944
27945 const char *
27946 thumb1_output_interwork (void)
27947 {
27948   const char * name;
27949   FILE *f = asm_out_file;
27950
27951   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27952   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27953               == SYMBOL_REF);
27954   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27955
27956   /* Generate code sequence to switch us into Thumb mode.  */
27957   /* The .code 32 directive has already been emitted by
27958      ASM_DECLARE_FUNCTION_NAME.  */
27959   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27960   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27961
27962   /* Generate a label, so that the debugger will notice the
27963      change in instruction sets.  This label is also used by
27964      the assembler to bypass the ARM code when this function
27965      is called from a Thumb encoded function elsewhere in the
27966      same file.  Hence the definition of STUB_NAME here must
27967      agree with the definition in gas/config/tc-arm.c.  */
27968
27969 #define STUB_NAME ".real_start_of"
27970
27971   fprintf (f, "\t.code\t16\n");
27972 #ifdef ARM_PE
27973   if (arm_dllexport_name_p (name))
27974     name = arm_strip_name_encoding (name);
27975 #endif
27976   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27977   fprintf (f, "\t.thumb_func\n");
27978   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27979
27980   return "";
27981 }
27982
27983 /* Handle the case of a double word load into a low register from
27984    a computed memory address.  The computed address may involve a
27985    register which is overwritten by the load.  */
27986 const char *
27987 thumb_load_double_from_address (rtx *operands)
27988 {
27989   rtx addr;
27990   rtx base;
27991   rtx offset;
27992   rtx arg1;
27993   rtx arg2;
27994
27995   gcc_assert (REG_P (operands[0]));
27996   gcc_assert (MEM_P (operands[1]));
27997
27998   /* Get the memory address.  */
27999   addr = XEXP (operands[1], 0);
28000
28001   /* Work out how the memory address is computed.  */
28002   switch (GET_CODE (addr))
28003     {
28004     case REG:
28005       operands[2] = adjust_address (operands[1], SImode, 4);
28006
28007       if (REGNO (operands[0]) == REGNO (addr))
28008         {
28009           output_asm_insn ("ldr\t%H0, %2", operands);
28010           output_asm_insn ("ldr\t%0, %1", operands);
28011         }
28012       else
28013         {
28014           output_asm_insn ("ldr\t%0, %1", operands);
28015           output_asm_insn ("ldr\t%H0, %2", operands);
28016         }
28017       break;
28018
28019     case CONST:
28020       /* Compute <address> + 4 for the high order load.  */
28021       operands[2] = adjust_address (operands[1], SImode, 4);
28022
28023       output_asm_insn ("ldr\t%0, %1", operands);
28024       output_asm_insn ("ldr\t%H0, %2", operands);
28025       break;
28026
28027     case PLUS:
28028       arg1   = XEXP (addr, 0);
28029       arg2   = XEXP (addr, 1);
28030
28031       if (CONSTANT_P (arg1))
28032         base = arg2, offset = arg1;
28033       else
28034         base = arg1, offset = arg2;
28035
28036       gcc_assert (REG_P (base));
28037
28038       /* Catch the case of <address> = <reg> + <reg> */
28039       if (REG_P (offset))
28040         {
28041           int reg_offset = REGNO (offset);
28042           int reg_base   = REGNO (base);
28043           int reg_dest   = REGNO (operands[0]);
28044
28045           /* Add the base and offset registers together into the
28046              higher destination register.  */
28047           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28048                        reg_dest + 1, reg_base, reg_offset);
28049
28050           /* Load the lower destination register from the address in
28051              the higher destination register.  */
28052           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28053                        reg_dest, reg_dest + 1);
28054
28055           /* Load the higher destination register from its own address
28056              plus 4.  */
28057           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28058                        reg_dest + 1, reg_dest + 1);
28059         }
28060       else
28061         {
28062           /* Compute <address> + 4 for the high order load.  */
28063           operands[2] = adjust_address (operands[1], SImode, 4);
28064
28065           /* If the computed address is held in the low order register
28066              then load the high order register first, otherwise always
28067              load the low order register first.  */
28068           if (REGNO (operands[0]) == REGNO (base))
28069             {
28070               output_asm_insn ("ldr\t%H0, %2", operands);
28071               output_asm_insn ("ldr\t%0, %1", operands);
28072             }
28073           else
28074             {
28075               output_asm_insn ("ldr\t%0, %1", operands);
28076               output_asm_insn ("ldr\t%H0, %2", operands);
28077             }
28078         }
28079       break;
28080
28081     case LABEL_REF:
28082       /* With no registers to worry about we can just load the value
28083          directly.  */
28084       operands[2] = adjust_address (operands[1], SImode, 4);
28085
28086       output_asm_insn ("ldr\t%H0, %2", operands);
28087       output_asm_insn ("ldr\t%0, %1", operands);
28088       break;
28089
28090     default:
28091       gcc_unreachable ();
28092     }
28093
28094   return "";
28095 }
28096
28097 const char *
28098 thumb_output_move_mem_multiple (int n, rtx *operands)
28099 {
28100   switch (n)
28101     {
28102     case 2:
28103       if (REGNO (operands[4]) > REGNO (operands[5]))
28104         std::swap (operands[4], operands[5]);
28105
28106       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28107       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28108       break;
28109
28110     case 3:
28111       if (REGNO (operands[4]) > REGNO (operands[5]))
28112         std::swap (operands[4], operands[5]);
28113       if (REGNO (operands[5]) > REGNO (operands[6]))
28114         std::swap (operands[5], operands[6]);
28115       if (REGNO (operands[4]) > REGNO (operands[5]))
28116         std::swap (operands[4], operands[5]);
28117
28118       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28119       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28120       break;
28121
28122     default:
28123       gcc_unreachable ();
28124     }
28125
28126   return "";
28127 }
28128
28129 /* Output a call-via instruction for thumb state.  */
28130 const char *
28131 thumb_call_via_reg (rtx reg)
28132 {
28133   int regno = REGNO (reg);
28134   rtx *labelp;
28135
28136   gcc_assert (regno < LR_REGNUM);
28137
28138   /* If we are in the normal text section we can use a single instance
28139      per compilation unit.  If we are doing function sections, then we need
28140      an entry per section, since we can't rely on reachability.  */
28141   if (in_section == text_section)
28142     {
28143       thumb_call_reg_needed = 1;
28144
28145       if (thumb_call_via_label[regno] == NULL)
28146         thumb_call_via_label[regno] = gen_label_rtx ();
28147       labelp = thumb_call_via_label + regno;
28148     }
28149   else
28150     {
28151       if (cfun->machine->call_via[regno] == NULL)
28152         cfun->machine->call_via[regno] = gen_label_rtx ();
28153       labelp = cfun->machine->call_via + regno;
28154     }
28155
28156   output_asm_insn ("bl\t%a0", labelp);
28157   return "";
28158 }
28159
28160 /* Routines for generating rtl.  */
28161 void
28162 thumb_expand_cpymemqi (rtx *operands)
28163 {
28164   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28165   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28166   HOST_WIDE_INT len = INTVAL (operands[2]);
28167   HOST_WIDE_INT offset = 0;
28168
28169   while (len >= 12)
28170     {
28171       emit_insn (gen_cpymem12b (out, in, out, in));
28172       len -= 12;
28173     }
28174
28175   if (len >= 8)
28176     {
28177       emit_insn (gen_cpymem8b (out, in, out, in));
28178       len -= 8;
28179     }
28180
28181   if (len >= 4)
28182     {
28183       rtx reg = gen_reg_rtx (SImode);
28184       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28185       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28186       len -= 4;
28187       offset += 4;
28188     }
28189
28190   if (len >= 2)
28191     {
28192       rtx reg = gen_reg_rtx (HImode);
28193       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28194                                               plus_constant (Pmode, in,
28195                                                              offset))));
28196       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28197                                                                 offset)),
28198                             reg));
28199       len -= 2;
28200       offset += 2;
28201     }
28202
28203   if (len)
28204     {
28205       rtx reg = gen_reg_rtx (QImode);
28206       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28207                                               plus_constant (Pmode, in,
28208                                                              offset))));
28209       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28210                                                                 offset)),
28211                             reg));
28212     }
28213 }
28214
28215 void
28216 thumb_reload_out_hi (rtx *operands)
28217 {
28218   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28219 }
28220
28221 /* Return the length of a function name prefix
28222     that starts with the character 'c'.  */
28223 static int
28224 arm_get_strip_length (int c)
28225 {
28226   switch (c)
28227     {
28228     ARM_NAME_ENCODING_LENGTHS
28229       default: return 0;
28230     }
28231 }
28232
28233 /* Return a pointer to a function's name with any
28234    and all prefix encodings stripped from it.  */
28235 const char *
28236 arm_strip_name_encoding (const char *name)
28237 {
28238   int skip;
28239
28240   while ((skip = arm_get_strip_length (* name)))
28241     name += skip;
28242
28243   return name;
28244 }
28245
28246 /* If there is a '*' anywhere in the name's prefix, then
28247    emit the stripped name verbatim, otherwise prepend an
28248    underscore if leading underscores are being used.  */
28249 void
28250 arm_asm_output_labelref (FILE *stream, const char *name)
28251 {
28252   int skip;
28253   int verbatim = 0;
28254
28255   while ((skip = arm_get_strip_length (* name)))
28256     {
28257       verbatim |= (*name == '*');
28258       name += skip;
28259     }
28260
28261   if (verbatim)
28262     fputs (name, stream);
28263   else
28264     asm_fprintf (stream, "%U%s", name);
28265 }
28266
28267 /* This function is used to emit an EABI tag and its associated value.
28268    We emit the numerical value of the tag in case the assembler does not
28269    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28270    the tag name in a comment so that anyone reading the assembler output
28271    will know which tag is being set.
28272
28273    This function is not static because arm-c.cc needs it too.  */
28274
28275 void
28276 arm_emit_eabi_attribute (const char *name, int num, int val)
28277 {
28278   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28279   if (flag_verbose_asm || flag_debug_asm)
28280     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28281   asm_fprintf (asm_out_file, "\n");
28282 }
28283
28284 /* This function is used to print CPU tuning information as comment
28285    in assembler file.  Pointers are not printed for now.  */
28286
28287 void
28288 arm_print_tune_info (void)
28289 {
28290   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28291   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28292                current_tune->constant_limit);
28293   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28294                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28295   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28296                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28297   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28298                "prefetch.l1_cache_size:\t%d\n",
28299                current_tune->prefetch.l1_cache_size);
28300   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28301                "prefetch.l1_cache_line_size:\t%d\n",
28302                current_tune->prefetch.l1_cache_line_size);
28303   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28304                "prefer_constant_pool:\t%d\n",
28305                (int) current_tune->prefer_constant_pool);
28306   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28307                "branch_cost:\t(s:speed, p:predictable)\n");
28308   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28309   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28310                current_tune->branch_cost (false, false));
28311   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28312                current_tune->branch_cost (false, true));
28313   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28314                current_tune->branch_cost (true, false));
28315   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28316                current_tune->branch_cost (true, true));
28317   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28318                "prefer_ldrd_strd:\t%d\n",
28319                (int) current_tune->prefer_ldrd_strd);
28320   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28321                "logical_op_non_short_circuit:\t[%d,%d]\n",
28322                (int) current_tune->logical_op_non_short_circuit_thumb,
28323                (int) current_tune->logical_op_non_short_circuit_arm);
28324   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28325                "disparage_flag_setting_t16_encodings:\t%d\n",
28326                (int) current_tune->disparage_flag_setting_t16_encodings);
28327   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28328                "string_ops_prefer_neon:\t%d\n",
28329                (int) current_tune->string_ops_prefer_neon);
28330   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28331                "max_insns_inline_memset:\t%d\n",
28332                current_tune->max_insns_inline_memset);
28333   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28334                current_tune->fusible_ops);
28335   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28336                (int) current_tune->sched_autopref);
28337 }
28338
28339 /* The last set of target options used to emit .arch directives, etc.  This
28340    could be a function-local static if it were not required to expose it as a
28341    root to the garbage collector.  */
28342 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28343
28344 /* Print .arch and .arch_extension directives corresponding to the
28345    current architecture configuration.  */
28346 static void
28347 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28348 {
28349   arm_build_target build_target;
28350   /* If the target options haven't changed since the last time we were called
28351      there is nothing to do.  This should be sufficient to suppress the
28352      majority of redundant work.  */
28353   if (last_asm_targ_options == targ_options)
28354     return;
28355
28356   last_asm_targ_options = targ_options;
28357
28358   build_target.isa = sbitmap_alloc (isa_num_bits);
28359   arm_configure_build_target (&build_target, targ_options, false);
28360
28361   if (build_target.core_name
28362       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28363     {
28364       const char* truncated_name
28365         = arm_rewrite_selected_cpu (build_target.core_name);
28366       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28367     }
28368
28369   const arch_option *arch
28370     = arm_parse_arch_option_name (all_architectures, "-march",
28371                                   build_target.arch_name);
28372   auto_sbitmap opt_bits (isa_num_bits);
28373
28374   gcc_assert (arch);
28375
28376   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28377     {
28378       /* Keep backward compatability for assemblers which don't support
28379          armv7ve.  Fortunately, none of the following extensions are reset
28380          by a .fpu directive.  */
28381       asm_fprintf (stream, "\t.arch armv7-a\n");
28382       asm_fprintf (stream, "\t.arch_extension virt\n");
28383       asm_fprintf (stream, "\t.arch_extension idiv\n");
28384       asm_fprintf (stream, "\t.arch_extension sec\n");
28385       asm_fprintf (stream, "\t.arch_extension mp\n");
28386     }
28387   else
28388     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28389
28390   /* The .fpu directive will reset any architecture extensions from the
28391      assembler that relate to the fp/vector extensions.  So put this out before
28392      any .arch_extension directives.  */
28393   const char *fpu_name = (TARGET_SOFT_FLOAT
28394                           ? "softvfp"
28395                           : arm_identify_fpu_from_isa (build_target.isa));
28396   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28397
28398   if (!arch->common.extensions)
28399     return;
28400
28401   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28402        opt->name != NULL;
28403        opt++)
28404     {
28405       if (!opt->remove)
28406         {
28407           arm_initialize_isa (opt_bits, opt->isa_bits);
28408
28409           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28410              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28411              floating point instructions is disabled.  So the following check
28412              restricts the printing of ".arch_extension mve" and
28413              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28414              this special behaviour because the feature bit "mve" and
28415              "mve_float" are not part of "fpu bits", so they are not cleared
28416              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28417              TARGET_HAVE_MVE_FLOAT are disabled.  */
28418           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28419               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28420                   && !TARGET_HAVE_MVE_FLOAT))
28421             continue;
28422
28423           /* If every feature bit of this option is set in the target ISA
28424              specification, print out the option name.  However, don't print
28425              anything if all the bits are part of the FPU specification.  */
28426           if (bitmap_subset_p (opt_bits, build_target.isa)
28427               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28428             asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28429         }
28430     }
28431 }
28432
28433 static void
28434 arm_file_start (void)
28435 {
28436   int val;
28437
28438   arm_print_asm_arch_directives
28439     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28440
28441   if (TARGET_BPABI)
28442     {
28443       /* If we have a named cpu, but we the assembler does not support that
28444          name via .cpu, put out a cpu name attribute; but don't do this if the
28445          name starts with the fictitious prefix, 'generic'.  */
28446       if (arm_active_target.core_name
28447           && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28448           && !startswith (arm_active_target.core_name, "generic"))
28449         {
28450           const char* truncated_name
28451             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28452           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28453             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28454                          truncated_name);
28455         }
28456
28457       if (print_tune_info)
28458         arm_print_tune_info ();
28459
28460       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28461         arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28462
28463       if (TARGET_HARD_FLOAT_ABI)
28464         arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28465
28466       /* Some of these attributes only apply when the corresponding features
28467          are used.  However we don't have any easy way of figuring this out.
28468          Conservatively record the setting that would have been used.  */
28469
28470       if (flag_rounding_math)
28471         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28472
28473       if (!flag_unsafe_math_optimizations)
28474         {
28475           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28476           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28477         }
28478       if (flag_signaling_nans)
28479         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28480
28481       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28482                            flag_finite_math_only ? 1 : 3);
28483
28484       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28485       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28486       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28487                                flag_short_enums ? 1 : 2);
28488
28489       /* Tag_ABI_optimization_goals.  */
28490       if (optimize_size)
28491         val = 4;
28492       else if (optimize >= 2)
28493         val = 2;
28494       else if (optimize)
28495         val = 1;
28496       else
28497         val = 6;
28498       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28499
28500       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28501                                unaligned_access);
28502
28503       if (arm_fp16_format)
28504         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28505                              (int) arm_fp16_format);
28506
28507       if (arm_lang_output_object_attributes_hook)
28508         arm_lang_output_object_attributes_hook();
28509     }
28510
28511   default_file_start ();
28512 }
28513
28514 static void
28515 arm_file_end (void)
28516 {
28517   int regno;
28518
28519   /* Just in case the last function output in the assembler had non-default
28520      architecture directives, we force the assembler state back to the default
28521      set, so that any 'calculated' build attributes are based on the default
28522      options rather than the special options for that function.  */
28523   arm_print_asm_arch_directives
28524     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28525
28526   if (NEED_INDICATE_EXEC_STACK)
28527     /* Add .note.GNU-stack.  */
28528     file_end_indicate_exec_stack ();
28529
28530   if (! thumb_call_reg_needed)
28531     return;
28532
28533   switch_to_section (text_section);
28534   asm_fprintf (asm_out_file, "\t.code 16\n");
28535   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28536
28537   for (regno = 0; regno < LR_REGNUM; regno++)
28538     {
28539       rtx label = thumb_call_via_label[regno];
28540
28541       if (label != 0)
28542         {
28543           targetm.asm_out.internal_label (asm_out_file, "L",
28544                                           CODE_LABEL_NUMBER (label));
28545           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28546         }
28547     }
28548 }
28549
28550 #ifndef ARM_PE
28551 /* Symbols in the text segment can be accessed without indirecting via the
28552    constant pool; it may take an extra binary operation, but this is still
28553    faster than indirecting via memory.  Don't do this when not optimizing,
28554    since we won't be calculating al of the offsets necessary to do this
28555    simplification.  */
28556
28557 static void
28558 arm_encode_section_info (tree decl, rtx rtl, int first)
28559 {
28560   if (optimize > 0 && TREE_CONSTANT (decl))
28561     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28562
28563   default_encode_section_info (decl, rtl, first);
28564 }
28565 #endif /* !ARM_PE */
28566
28567 static void
28568 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28569 {
28570   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28571       && !strcmp (prefix, "L"))
28572     {
28573       arm_ccfsm_state = 0;
28574       arm_target_insn = NULL;
28575     }
28576   default_internal_label (stream, prefix, labelno);
28577 }
28578
28579 /* Define classes to generate code as RTL or output asm to a file.
28580    Using templates then allows to use the same code to output code
28581    sequences in the two formats.  */
28582 class thumb1_const_rtl
28583 {
28584  public:
28585   thumb1_const_rtl (rtx dst) : dst (dst) {}
28586
28587   void mov (HOST_WIDE_INT val)
28588   {
28589     emit_set_insn (dst, GEN_INT (val));
28590   }
28591
28592   void add (HOST_WIDE_INT val)
28593   {
28594     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28595   }
28596
28597   void ashift (HOST_WIDE_INT shift)
28598   {
28599     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28600   }
28601
28602   void neg ()
28603   {
28604     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28605   }
28606
28607  private:
28608   rtx dst;
28609 };
28610
28611 class thumb1_const_print
28612 {
28613  public:
28614   thumb1_const_print (FILE *f, int regno)
28615   {
28616     t_file = f;
28617     dst_regname = reg_names[regno];
28618   }
28619
28620   void mov (HOST_WIDE_INT val)
28621   {
28622     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28623                  dst_regname, val);
28624   }
28625
28626   void add (HOST_WIDE_INT val)
28627   {
28628     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28629                  dst_regname, val);
28630   }
28631
28632   void ashift (HOST_WIDE_INT shift)
28633   {
28634     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28635                  dst_regname, shift);
28636   }
28637
28638   void neg ()
28639   {
28640     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28641   }
28642
28643  private:
28644   FILE *t_file;
28645   const char *dst_regname;
28646 };
28647
28648 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28649    Avoid generating useless code when one of the bytes is zero.  */
28650 template <class T>
28651 void
28652 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28653 {
28654   bool mov_done_p = false;
28655   unsigned HOST_WIDE_INT val = op1;
28656   int shift = 0;
28657   int i;
28658
28659   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28660
28661   if (val <= 255)
28662     {
28663       dst.mov (val);
28664       return;
28665     }
28666
28667   /* For negative numbers with the first nine bits set, build the
28668      opposite of OP1, then negate it, it's generally shorter and not
28669      longer.  */
28670   if ((val & 0xFF800000) == 0xFF800000)
28671     {
28672       thumb1_gen_const_int_1 (dst, -op1);
28673       dst.neg ();
28674       return;
28675     }
28676
28677   /* In the general case, we need 7 instructions to build
28678      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28679      do better if VAL is small enough, or
28680      right-shiftable by a suitable amount.  If the
28681      right-shift enables to encode at least one less byte,
28682      it's worth it: we save a adds and a lsls at the
28683      expense of a final lsls.  */
28684   int final_shift = number_of_first_bit_set (val);
28685
28686   int leading_zeroes = clz_hwi (val);
28687   int number_of_bytes_needed
28688     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28689        / BITS_PER_UNIT) + 1;
28690   int number_of_bytes_needed2
28691     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28692        / BITS_PER_UNIT) + 1;
28693
28694   if (number_of_bytes_needed2 < number_of_bytes_needed)
28695     val >>= final_shift;
28696   else
28697     final_shift = 0;
28698
28699   /* If we are in a very small range, we can use either a single movs
28700      or movs+adds.  */
28701   if (val <= 510)
28702     {
28703       if (val > 255)
28704         {
28705           unsigned HOST_WIDE_INT high = val - 255;
28706
28707           dst.mov (high);
28708           dst.add (255);
28709         }
28710       else
28711         dst.mov (val);
28712
28713       if (final_shift > 0)
28714         dst.ashift (final_shift);
28715     }
28716   else
28717     {
28718       /* General case, emit upper 3 bytes as needed.  */
28719       for (i = 0; i < 3; i++)
28720         {
28721           unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28722
28723           if (byte)
28724             {
28725               /* We are about to emit new bits, stop accumulating a
28726                  shift amount, and left-shift only if we have already
28727                  emitted some upper bits.  */
28728               if (mov_done_p)
28729                 {
28730                   dst.ashift (shift);
28731                   dst.add (byte);
28732                 }
28733               else
28734                 dst.mov (byte);
28735
28736               /* Stop accumulating shift amount since we've just
28737                  emitted some bits.  */
28738               shift = 0;
28739
28740               mov_done_p = true;
28741             }
28742
28743           if (mov_done_p)
28744             shift += 8;
28745         }
28746
28747       /* Emit lower byte.  */
28748       if (!mov_done_p)
28749         dst.mov (val & 0xff);
28750       else
28751         {
28752           dst.ashift (shift);
28753           if (val & 0xff)
28754             dst.add (val & 0xff);
28755         }
28756
28757       if (final_shift > 0)
28758         dst.ashift (final_shift);
28759     }
28760 }
28761
28762 /* Proxies for thumb1.md, since the thumb1_const_print and
28763    thumb1_const_rtl classes are not exported.  */
28764 void
28765 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28766 {
28767   thumb1_const_rtl t (dst);
28768   thumb1_gen_const_int_1 (t, op1);
28769 }
28770
28771 void
28772 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28773 {
28774   thumb1_const_print t (asm_out_file, REGNO (dst));
28775   thumb1_gen_const_int_1 (t, op1);
28776 }
28777
28778 /* Output code to add DELTA to the first argument, and then jump
28779    to FUNCTION.  Used for C++ multiple inheritance.  */
28780
28781 static void
28782 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28783                      HOST_WIDE_INT, tree function)
28784 {
28785   static int thunk_label = 0;
28786   char label[256];
28787   char labelpc[256];
28788   int mi_delta = delta;
28789   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28790   int shift = 0;
28791   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28792                     ? 1 : 0);
28793   if (mi_delta < 0)
28794     mi_delta = - mi_delta;
28795
28796   final_start_function (emit_barrier (), file, 1);
28797
28798   if (TARGET_THUMB1)
28799     {
28800       int labelno = thunk_label++;
28801       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28802       /* Thunks are entered in arm mode when available.  */
28803       if (TARGET_THUMB1_ONLY)
28804         {
28805           /* push r3 so we can use it as a temporary.  */
28806           /* TODO: Omit this save if r3 is not used.  */
28807           fputs ("\tpush {r3}\n", file);
28808
28809           /* With -mpure-code, we cannot load the address from the
28810              constant pool: we build it explicitly.  */
28811           if (target_pure_code)
28812             {
28813               fputs ("\tmovs\tr3, #:upper8_15:#", file);
28814               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28815               fputc ('\n', file);
28816               fputs ("\tlsls r3, #8\n", file);
28817               fputs ("\tadds\tr3, #:upper0_7:#", file);
28818               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28819               fputc ('\n', file);
28820               fputs ("\tlsls r3, #8\n", file);
28821               fputs ("\tadds\tr3, #:lower8_15:#", file);
28822               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28823               fputc ('\n', file);
28824               fputs ("\tlsls r3, #8\n", file);
28825               fputs ("\tadds\tr3, #:lower0_7:#", file);
28826               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28827               fputc ('\n', file);
28828             }
28829           else
28830             fputs ("\tldr\tr3, ", file);
28831         }
28832       else
28833         {
28834           fputs ("\tldr\tr12, ", file);
28835         }
28836
28837       if (!target_pure_code)
28838         {
28839           assemble_name (file, label);
28840           fputc ('\n', file);
28841         }
28842
28843       if (flag_pic)
28844         {
28845           /* If we are generating PIC, the ldr instruction below loads
28846              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28847              the address of the add + 8, so we have:
28848
28849              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28850                  = target + 1.
28851
28852              Note that we have "+ 1" because some versions of GNU ld
28853              don't set the low bit of the result for R_ARM_REL32
28854              relocations against thumb function symbols.
28855              On ARMv6M this is +4, not +8.  */
28856           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28857           assemble_name (file, labelpc);
28858           fputs (":\n", file);
28859           if (TARGET_THUMB1_ONLY)
28860             {
28861               /* This is 2 insns after the start of the thunk, so we know it
28862                  is 4-byte aligned.  */
28863               fputs ("\tadd\tr3, pc, r3\n", file);
28864               fputs ("\tmov r12, r3\n", file);
28865             }
28866           else
28867             fputs ("\tadd\tr12, pc, r12\n", file);
28868         }
28869       else if (TARGET_THUMB1_ONLY)
28870         fputs ("\tmov r12, r3\n", file);
28871     }
28872   if (TARGET_THUMB1_ONLY)
28873     {
28874       if (mi_delta > 255)
28875         {
28876           /* With -mpure-code, we cannot load MI_DELTA from the
28877              constant pool: we build it explicitly.  */
28878           if (target_pure_code)
28879             {
28880               thumb1_const_print r3 (file, 3);
28881               thumb1_gen_const_int_1 (r3, mi_delta);
28882             }
28883           else
28884             {
28885               fputs ("\tldr\tr3, ", file);
28886               assemble_name (file, label);
28887               fputs ("+4\n", file);
28888             }
28889           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28890                        mi_op, this_regno, this_regno);
28891         }
28892       else if (mi_delta != 0)
28893         {
28894           /* Thumb1 unified syntax requires s suffix in instruction name when
28895              one of the operands is immediate.  */
28896           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28897                        mi_op, this_regno, this_regno,
28898                        mi_delta);
28899         }
28900     }
28901   else
28902     {
28903       /* TODO: Use movw/movt for large constants when available.  */
28904       while (mi_delta != 0)
28905         {
28906           if ((mi_delta & (3 << shift)) == 0)
28907             shift += 2;
28908           else
28909             {
28910               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28911                            mi_op, this_regno, this_regno,
28912                            mi_delta & (0xff << shift));
28913               mi_delta &= ~(0xff << shift);
28914               shift += 8;
28915             }
28916         }
28917     }
28918   if (TARGET_THUMB1)
28919     {
28920       if (TARGET_THUMB1_ONLY)
28921         fputs ("\tpop\t{r3}\n", file);
28922
28923       fprintf (file, "\tbx\tr12\n");
28924
28925       /* With -mpure-code, we don't need to emit literals for the
28926          function address and delta since we emitted code to build
28927          them.  */
28928       if (!target_pure_code)
28929         {
28930           ASM_OUTPUT_ALIGN (file, 2);
28931           assemble_name (file, label);
28932           fputs (":\n", file);
28933           if (flag_pic)
28934             {
28935               /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28936               rtx tem = XEXP (DECL_RTL (function), 0);
28937               /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28938                  pipeline offset is four rather than eight.  Adjust the offset
28939                  accordingly.  */
28940               tem = plus_constant (GET_MODE (tem), tem,
28941                                    TARGET_THUMB1_ONLY ? -3 : -7);
28942               tem = gen_rtx_MINUS (GET_MODE (tem),
28943                                    tem,
28944                                    gen_rtx_SYMBOL_REF (Pmode,
28945                                                        ggc_strdup (labelpc)));
28946               assemble_integer (tem, 4, BITS_PER_WORD, 1);
28947             }
28948           else
28949             /* Output ".word .LTHUNKn".  */
28950             assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28951
28952           if (TARGET_THUMB1_ONLY && mi_delta > 255)
28953             assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
28954         }
28955     }
28956   else
28957     {
28958       fputs ("\tb\t", file);
28959       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28960       if (NEED_PLT_RELOC)
28961         fputs ("(PLT)", file);
28962       fputc ('\n', file);
28963     }
28964
28965   final_end_function ();
28966 }
28967
28968 /* MI thunk handling for TARGET_32BIT.  */
28969
28970 static void
28971 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28972                        HOST_WIDE_INT vcall_offset, tree function)
28973 {
28974   const bool long_call_p = arm_is_long_call_p (function);
28975
28976   /* On ARM, this_regno is R0 or R1 depending on
28977      whether the function returns an aggregate or not.
28978   */
28979   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28980                                        function)
28981                     ? R1_REGNUM : R0_REGNUM);
28982
28983   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28984   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28985   reload_completed = 1;
28986   emit_note (NOTE_INSN_PROLOGUE_END);
28987
28988   /* Add DELTA to THIS_RTX.  */
28989   if (delta != 0)
28990     arm_split_constant (PLUS, Pmode, NULL_RTX,
28991                         delta, this_rtx, this_rtx, false);
28992
28993   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
28994   if (vcall_offset != 0)
28995     {
28996       /* Load *THIS_RTX.  */
28997       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28998       /* Compute *THIS_RTX + VCALL_OFFSET.  */
28999       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29000                           false);
29001       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
29002       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29003       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29004     }
29005
29006   /* Generate a tail call to the target function.  */
29007   if (!TREE_USED (function))
29008     {
29009       assemble_external (function);
29010       TREE_USED (function) = 1;
29011     }
29012   rtx funexp = XEXP (DECL_RTL (function), 0);
29013   if (long_call_p)
29014     {
29015       emit_move_insn (temp, funexp);
29016       funexp = temp;
29017     }
29018   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29019   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29020   SIBLING_CALL_P (insn) = 1;
29021   emit_barrier ();
29022
29023   /* Indirect calls require a bit of fixup in PIC mode.  */
29024   if (long_call_p)
29025     {
29026       split_all_insns_noflow ();
29027       arm_reorg ();
29028     }
29029
29030   insn = get_insns ();
29031   shorten_branches (insn);
29032   final_start_function (insn, file, 1);
29033   final (insn, file, 1);
29034   final_end_function ();
29035
29036   /* Stop pretending this is a post-reload pass.  */
29037   reload_completed = 0;
29038 }
29039
29040 /* Output code to add DELTA to the first argument, and then jump
29041    to FUNCTION.  Used for C++ multiple inheritance.  */
29042
29043 static void
29044 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29045                      HOST_WIDE_INT vcall_offset, tree function)
29046 {
29047   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29048
29049   assemble_start_function (thunk, fnname);
29050   if (TARGET_32BIT)
29051     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29052   else
29053     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29054   assemble_end_function (thunk, fnname);
29055 }
29056
29057 int
29058 arm_emit_vector_const (FILE *file, rtx x)
29059 {
29060   int i;
29061   const char * pattern;
29062
29063   gcc_assert (GET_CODE (x) == CONST_VECTOR);
29064
29065   switch (GET_MODE (x))
29066     {
29067     case E_V2SImode: pattern = "%08x"; break;
29068     case E_V4HImode: pattern = "%04x"; break;
29069     case E_V8QImode: pattern = "%02x"; break;
29070     default:       gcc_unreachable ();
29071     }
29072
29073   fprintf (file, "0x");
29074   for (i = CONST_VECTOR_NUNITS (x); i--;)
29075     {
29076       rtx element;
29077
29078       element = CONST_VECTOR_ELT (x, i);
29079       fprintf (file, pattern, INTVAL (element));
29080     }
29081
29082   return 1;
29083 }
29084
29085 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29086    HFmode constant pool entries are actually loaded with ldr.  */
29087 void
29088 arm_emit_fp16_const (rtx c)
29089 {
29090   long bits;
29091
29092   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29093   if (WORDS_BIG_ENDIAN)
29094     assemble_zeros (2);
29095   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29096   if (!WORDS_BIG_ENDIAN)
29097     assemble_zeros (2);
29098 }
29099
29100 const char *
29101 arm_output_load_gr (rtx *operands)
29102 {
29103   rtx reg;
29104   rtx offset;
29105   rtx wcgr;
29106   rtx sum;
29107
29108   if (!MEM_P (operands [1])
29109       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29110       || !REG_P (reg = XEXP (sum, 0))
29111       || !CONST_INT_P (offset = XEXP (sum, 1))
29112       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29113     return "wldrw%?\t%0, %1";
29114
29115   /* Fix up an out-of-range load of a GR register.  */
29116   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29117   wcgr = operands[0];
29118   operands[0] = reg;
29119   output_asm_insn ("ldr%?\t%0, %1", operands);
29120
29121   operands[0] = wcgr;
29122   operands[1] = reg;
29123   output_asm_insn ("tmcr%?\t%0, %1", operands);
29124   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29125
29126   return "";
29127 }
29128
29129 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29130
29131    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29132    named arg and all anonymous args onto the stack.
29133    XXX I know the prologue shouldn't be pushing registers, but it is faster
29134    that way.  */
29135
29136 static void
29137 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29138                             const function_arg_info &arg,
29139                             int *pretend_size,
29140                             int second_time ATTRIBUTE_UNUSED)
29141 {
29142   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29143   int nregs;
29144
29145   cfun->machine->uses_anonymous_args = 1;
29146   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29147     {
29148       nregs = pcum->aapcs_ncrn;
29149       if (nregs & 1)
29150         {
29151           int res = arm_needs_doubleword_align (arg.mode, arg.type);
29152           if (res < 0 && warn_psabi)
29153             inform (input_location, "parameter passing for argument of "
29154                     "type %qT changed in GCC 7.1", arg.type);
29155           else if (res > 0)
29156             {
29157               nregs++;
29158               if (res > 1 && warn_psabi)
29159                 inform (input_location,
29160                         "parameter passing for argument of type "
29161                         "%qT changed in GCC 9.1", arg.type);
29162             }
29163         }
29164     }
29165   else
29166     nregs = pcum->nregs;
29167
29168   if (nregs < NUM_ARG_REGS)
29169     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29170 }
29171
29172 /* We can't rely on the caller doing the proper promotion when
29173    using APCS or ATPCS.  */
29174
29175 static bool
29176 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29177 {
29178     return !TARGET_AAPCS_BASED;
29179 }
29180
29181 static machine_mode
29182 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29183                            machine_mode mode,
29184                            int *punsignedp ATTRIBUTE_UNUSED,
29185                            const_tree fntype ATTRIBUTE_UNUSED,
29186                            int for_return ATTRIBUTE_UNUSED)
29187 {
29188   if (GET_MODE_CLASS (mode) == MODE_INT
29189       && GET_MODE_SIZE (mode) < 4)
29190     return SImode;
29191
29192   return mode;
29193 }
29194
29195
29196 static bool
29197 arm_default_short_enums (void)
29198 {
29199   return ARM_DEFAULT_SHORT_ENUMS;
29200 }
29201
29202
29203 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
29204
29205 static bool
29206 arm_align_anon_bitfield (void)
29207 {
29208   return TARGET_AAPCS_BASED;
29209 }
29210
29211
29212 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
29213
29214 static tree
29215 arm_cxx_guard_type (void)
29216 {
29217   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29218 }
29219
29220
29221 /* The EABI says test the least significant bit of a guard variable.  */
29222
29223 static bool
29224 arm_cxx_guard_mask_bit (void)
29225 {
29226   return TARGET_AAPCS_BASED;
29227 }
29228
29229
29230 /* The EABI specifies that all array cookies are 8 bytes long.  */
29231
29232 static tree
29233 arm_get_cookie_size (tree type)
29234 {
29235   tree size;
29236
29237   if (!TARGET_AAPCS_BASED)
29238     return default_cxx_get_cookie_size (type);
29239
29240   size = build_int_cst (sizetype, 8);
29241   return size;
29242 }
29243
29244
29245 /* The EABI says that array cookies should also contain the element size.  */
29246
29247 static bool
29248 arm_cookie_has_size (void)
29249 {
29250   return TARGET_AAPCS_BASED;
29251 }
29252
29253
29254 /* The EABI says constructors and destructors should return a pointer to
29255    the object constructed/destroyed.  */
29256
29257 static bool
29258 arm_cxx_cdtor_returns_this (void)
29259 {
29260   return TARGET_AAPCS_BASED;
29261 }
29262
29263 /* The EABI says that an inline function may never be the key
29264    method.  */
29265
29266 static bool
29267 arm_cxx_key_method_may_be_inline (void)
29268 {
29269   return !TARGET_AAPCS_BASED;
29270 }
29271
29272 static void
29273 arm_cxx_determine_class_data_visibility (tree decl)
29274 {
29275   if (!TARGET_AAPCS_BASED
29276       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29277     return;
29278
29279   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29280      is exported.  However, on systems without dynamic vague linkage,
29281      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29282   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29283     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29284   else
29285     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29286   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29287 }
29288
29289 static bool
29290 arm_cxx_class_data_always_comdat (void)
29291 {
29292   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29293      vague linkage if the class has no key function.  */
29294   return !TARGET_AAPCS_BASED;
29295 }
29296
29297
29298 /* The EABI says __aeabi_atexit should be used to register static
29299    destructors.  */
29300
29301 static bool
29302 arm_cxx_use_aeabi_atexit (void)
29303 {
29304   return TARGET_AAPCS_BASED;
29305 }
29306
29307
29308 void
29309 arm_set_return_address (rtx source, rtx scratch)
29310 {
29311   arm_stack_offsets *offsets;
29312   HOST_WIDE_INT delta;
29313   rtx addr, mem;
29314   unsigned long saved_regs;
29315
29316   offsets = arm_get_frame_offsets ();
29317   saved_regs = offsets->saved_regs_mask;
29318
29319   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29320     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29321   else
29322     {
29323       if (frame_pointer_needed)
29324         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29325       else
29326         {
29327           /* LR will be the first saved register.  */
29328           delta = offsets->outgoing_args - (offsets->frame + 4);
29329
29330
29331           if (delta >= 4096)
29332             {
29333               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29334                                      GEN_INT (delta & ~4095)));
29335               addr = scratch;
29336               delta &= 4095;
29337             }
29338           else
29339             addr = stack_pointer_rtx;
29340
29341           addr = plus_constant (Pmode, addr, delta);
29342         }
29343
29344       /* The store needs to be marked to prevent DSE from deleting
29345          it as dead if it is based on fp.  */
29346       mem = gen_frame_mem (Pmode, addr);
29347       MEM_VOLATILE_P (mem) = true;
29348       emit_move_insn (mem, source);
29349     }
29350 }
29351
29352
29353 void
29354 thumb_set_return_address (rtx source, rtx scratch)
29355 {
29356   arm_stack_offsets *offsets;
29357   HOST_WIDE_INT delta;
29358   HOST_WIDE_INT limit;
29359   int reg;
29360   rtx addr, mem;
29361   unsigned long mask;
29362
29363   emit_use (source);
29364
29365   offsets = arm_get_frame_offsets ();
29366   mask = offsets->saved_regs_mask;
29367   if (mask & (1 << LR_REGNUM))
29368     {
29369       limit = 1024;
29370       /* Find the saved regs.  */
29371       if (frame_pointer_needed)
29372         {
29373           delta = offsets->soft_frame - offsets->saved_args;
29374           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29375           if (TARGET_THUMB1)
29376             limit = 128;
29377         }
29378       else
29379         {
29380           delta = offsets->outgoing_args - offsets->saved_args;
29381           reg = SP_REGNUM;
29382         }
29383       /* Allow for the stack frame.  */
29384       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29385         delta -= 16;
29386       /* The link register is always the first saved register.  */
29387       delta -= 4;
29388
29389       /* Construct the address.  */
29390       addr = gen_rtx_REG (SImode, reg);
29391       if (delta > limit)
29392         {
29393           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29394           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29395           addr = scratch;
29396         }
29397       else
29398         addr = plus_constant (Pmode, addr, delta);
29399
29400       /* The store needs to be marked to prevent DSE from deleting
29401          it as dead if it is based on fp.  */
29402       mem = gen_frame_mem (Pmode, addr);
29403       MEM_VOLATILE_P (mem) = true;
29404       emit_move_insn (mem, source);
29405     }
29406   else
29407     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29408 }
29409
29410 /* Implements target hook vector_mode_supported_p.  */
29411 bool
29412 arm_vector_mode_supported_p (machine_mode mode)
29413 {
29414   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29415   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29416       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29417       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29418       || mode == V8BFmode))
29419     return true;
29420
29421   if ((TARGET_NEON || TARGET_IWMMXT)
29422       && ((mode == V2SImode)
29423           || (mode == V4HImode)
29424           || (mode == V8QImode)))
29425     return true;
29426
29427   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29428       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29429       || mode == V2HAmode))
29430     return true;
29431
29432   if (TARGET_HAVE_MVE
29433       && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29434           || mode == V16QImode
29435           || mode == V16BImode || mode == V8BImode || mode == V4BImode))
29436       return true;
29437
29438   if (TARGET_HAVE_MVE_FLOAT
29439       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29440       return true;
29441
29442   return false;
29443 }
29444
29445 /* Implements target hook array_mode_supported_p.  */
29446
29447 static bool
29448 arm_array_mode_supported_p (machine_mode mode,
29449                             unsigned HOST_WIDE_INT nelems)
29450 {
29451   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29452      for now, as the lane-swapping logic needs to be extended in the expanders.
29453      See PR target/82518.  */
29454   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29455       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29456       && (nelems >= 2 && nelems <= 4))
29457     return true;
29458
29459   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29460       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29461     return true;
29462
29463   return false;
29464 }
29465
29466 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29467    registers when autovectorizing for Neon, at least until multiple vector
29468    widths are supported properly by the middle-end.  */
29469
29470 static machine_mode
29471 arm_preferred_simd_mode (scalar_mode mode)
29472 {
29473   if (TARGET_NEON)
29474     switch (mode)
29475       {
29476       case E_HFmode:
29477         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29478       case E_SFmode:
29479         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29480       case E_SImode:
29481         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29482       case E_HImode:
29483         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29484       case E_QImode:
29485         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29486       case E_DImode:
29487         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29488           return V2DImode;
29489         break;
29490
29491       default:;
29492       }
29493
29494   if (TARGET_REALLY_IWMMXT)
29495     switch (mode)
29496       {
29497       case E_SImode:
29498         return V2SImode;
29499       case E_HImode:
29500         return V4HImode;
29501       case E_QImode:
29502         return V8QImode;
29503
29504       default:;
29505       }
29506
29507   if (TARGET_HAVE_MVE)
29508     switch (mode)
29509       {
29510       case E_QImode:
29511         return V16QImode;
29512       case E_HImode:
29513         return V8HImode;
29514       case E_SImode:
29515         return V4SImode;
29516
29517       default:;
29518       }
29519
29520   if (TARGET_HAVE_MVE_FLOAT)
29521     switch (mode)
29522       {
29523       case E_HFmode:
29524         return V8HFmode;
29525       case E_SFmode:
29526         return V4SFmode;
29527
29528       default:;
29529       }
29530
29531   return word_mode;
29532 }
29533
29534 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29535
29536    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29537    using r0-r4 for function arguments, r7 for the stack frame and don't have
29538    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29539    potentially problematic instructions accept high registers so this is not
29540    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29541    that require many low registers.  */
29542 static bool
29543 arm_class_likely_spilled_p (reg_class_t rclass)
29544 {
29545   if ((TARGET_THUMB1 && rclass == LO_REGS)
29546       || rclass  == CC_REG)
29547     return true;
29548
29549   return default_class_likely_spilled_p (rclass);
29550 }
29551
29552 /* Implements target hook small_register_classes_for_mode_p.  */
29553 bool
29554 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29555 {
29556   return TARGET_THUMB1;
29557 }
29558
29559 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29560    ARM insns and therefore guarantee that the shift count is modulo 256.
29561    DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29562    guarantee no particular behavior for out-of-range counts.  */
29563
29564 static unsigned HOST_WIDE_INT
29565 arm_shift_truncation_mask (machine_mode mode)
29566 {
29567   return mode == SImode ? 255 : 0;
29568 }
29569
29570
29571 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29572
29573 unsigned int
29574 arm_dbx_register_number (unsigned int regno)
29575 {
29576   if (regno < 16)
29577     return regno;
29578
29579   if (IS_VFP_REGNUM (regno))
29580     {
29581       /* See comment in arm_dwarf_register_span.  */
29582       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29583         return 64 + regno - FIRST_VFP_REGNUM;
29584       else
29585         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29586     }
29587
29588   if (IS_IWMMXT_GR_REGNUM (regno))
29589     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29590
29591   if (IS_IWMMXT_REGNUM (regno))
29592     return 112 + regno - FIRST_IWMMXT_REGNUM;
29593
29594   return DWARF_FRAME_REGISTERS;
29595 }
29596
29597 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29598    GCC models tham as 64 32-bit registers, so we need to describe this to
29599    the DWARF generation code.  Other registers can use the default.  */
29600 static rtx
29601 arm_dwarf_register_span (rtx rtl)
29602 {
29603   machine_mode mode;
29604   unsigned regno;
29605   rtx parts[16];
29606   int nregs;
29607   int i;
29608
29609   regno = REGNO (rtl);
29610   if (!IS_VFP_REGNUM (regno))
29611     return NULL_RTX;
29612
29613   /* XXX FIXME: The EABI defines two VFP register ranges:
29614         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29615         256-287: D0-D31
29616      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29617      corresponding D register.  Until GDB supports this, we shall use the
29618      legacy encodings.  We also use these encodings for D0-D15 for
29619      compatibility with older debuggers.  */
29620   mode = GET_MODE (rtl);
29621   if (GET_MODE_SIZE (mode) < 8)
29622     return NULL_RTX;
29623
29624   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29625     {
29626       nregs = GET_MODE_SIZE (mode) / 4;
29627       for (i = 0; i < nregs; i += 2)
29628         if (TARGET_BIG_END)
29629           {
29630             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29631             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29632           }
29633         else
29634           {
29635             parts[i] = gen_rtx_REG (SImode, regno + i);
29636             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29637           }
29638     }
29639   else
29640     {
29641       nregs = GET_MODE_SIZE (mode) / 8;
29642       for (i = 0; i < nregs; i++)
29643         parts[i] = gen_rtx_REG (DImode, regno + i);
29644     }
29645
29646   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29647 }
29648
29649 #if ARM_UNWIND_INFO
29650 /* Emit unwind directives for a store-multiple instruction or stack pointer
29651    push during alignment.
29652    These should only ever be generated by the function prologue code, so
29653    expect them to have a particular form.
29654    The store-multiple instruction sometimes pushes pc as the last register,
29655    although it should not be tracked into unwind information, or for -Os
29656    sometimes pushes some dummy registers before first register that needs
29657    to be tracked in unwind information; such dummy registers are there just
29658    to avoid separate stack adjustment, and will not be restored in the
29659    epilogue.  */
29660
29661 static void
29662 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29663 {
29664   int i;
29665   HOST_WIDE_INT offset;
29666   HOST_WIDE_INT nregs;
29667   int reg_size;
29668   unsigned reg;
29669   unsigned lastreg;
29670   unsigned padfirst = 0, padlast = 0;
29671   rtx e;
29672
29673   e = XVECEXP (p, 0, 0);
29674   gcc_assert (GET_CODE (e) == SET);
29675
29676   /* First insn will adjust the stack pointer.  */
29677   gcc_assert (GET_CODE (e) == SET
29678               && REG_P (SET_DEST (e))
29679               && REGNO (SET_DEST (e)) == SP_REGNUM
29680               && GET_CODE (SET_SRC (e)) == PLUS);
29681
29682   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29683   nregs = XVECLEN (p, 0) - 1;
29684   gcc_assert (nregs);
29685
29686   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29687   if (reg < 16)
29688     {
29689       /* For -Os dummy registers can be pushed at the beginning to
29690          avoid separate stack pointer adjustment.  */
29691       e = XVECEXP (p, 0, 1);
29692       e = XEXP (SET_DEST (e), 0);
29693       if (GET_CODE (e) == PLUS)
29694         padfirst = INTVAL (XEXP (e, 1));
29695       gcc_assert (padfirst == 0 || optimize_size);
29696       /* The function prologue may also push pc, but not annotate it as it is
29697          never restored.  We turn this into a stack pointer adjustment.  */
29698       e = XVECEXP (p, 0, nregs);
29699       e = XEXP (SET_DEST (e), 0);
29700       if (GET_CODE (e) == PLUS)
29701         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29702       else
29703         padlast = offset - 4;
29704       gcc_assert (padlast == 0 || padlast == 4);
29705       if (padlast == 4)
29706         fprintf (out_file, "\t.pad #4\n");
29707       reg_size = 4;
29708       fprintf (out_file, "\t.save {");
29709     }
29710   else if (IS_VFP_REGNUM (reg))
29711     {
29712       reg_size = 8;
29713       fprintf (out_file, "\t.vsave {");
29714     }
29715   else
29716     /* Unknown register type.  */
29717     gcc_unreachable ();
29718
29719   /* If the stack increment doesn't match the size of the saved registers,
29720      something has gone horribly wrong.  */
29721   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29722
29723   offset = padfirst;
29724   lastreg = 0;
29725   /* The remaining insns will describe the stores.  */
29726   for (i = 1; i <= nregs; i++)
29727     {
29728       /* Expect (set (mem <addr>) (reg)).
29729          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29730       e = XVECEXP (p, 0, i);
29731       gcc_assert (GET_CODE (e) == SET
29732                   && MEM_P (SET_DEST (e))
29733                   && REG_P (SET_SRC (e)));
29734
29735       reg = REGNO (SET_SRC (e));
29736       gcc_assert (reg >= lastreg);
29737
29738       if (i != 1)
29739         fprintf (out_file, ", ");
29740       /* We can't use %r for vfp because we need to use the
29741          double precision register names.  */
29742       if (IS_VFP_REGNUM (reg))
29743         asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29744       else
29745         asm_fprintf (out_file, "%r", reg);
29746
29747       if (flag_checking)
29748         {
29749           /* Check that the addresses are consecutive.  */
29750           e = XEXP (SET_DEST (e), 0);
29751           if (GET_CODE (e) == PLUS)
29752             gcc_assert (REG_P (XEXP (e, 0))
29753                         && REGNO (XEXP (e, 0)) == SP_REGNUM
29754                         && CONST_INT_P (XEXP (e, 1))
29755                         && offset == INTVAL (XEXP (e, 1)));
29756           else
29757             gcc_assert (i == 1
29758                         && REG_P (e)
29759                         && REGNO (e) == SP_REGNUM);
29760           offset += reg_size;
29761         }
29762     }
29763   fprintf (out_file, "}\n");
29764   if (padfirst)
29765     fprintf (out_file, "\t.pad #%d\n", padfirst);
29766 }
29767
29768 /*  Emit unwind directives for a SET.  */
29769
29770 static void
29771 arm_unwind_emit_set (FILE * out_file, rtx p)
29772 {
29773   rtx e0;
29774   rtx e1;
29775   unsigned reg;
29776
29777   e0 = XEXP (p, 0);
29778   e1 = XEXP (p, 1);
29779   switch (GET_CODE (e0))
29780     {
29781     case MEM:
29782       /* Pushing a single register.  */
29783       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29784           || !REG_P (XEXP (XEXP (e0, 0), 0))
29785           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29786         abort ();
29787
29788       asm_fprintf (out_file, "\t.save ");
29789       if (IS_VFP_REGNUM (REGNO (e1)))
29790         asm_fprintf(out_file, "{d%d}\n",
29791                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29792       else
29793         asm_fprintf(out_file, "{%r}\n", REGNO (e1));
29794       break;
29795
29796     case REG:
29797       if (REGNO (e0) == SP_REGNUM)
29798         {
29799           /* A stack increment.  */
29800           if (GET_CODE (e1) != PLUS
29801               || !REG_P (XEXP (e1, 0))
29802               || REGNO (XEXP (e1, 0)) != SP_REGNUM
29803               || !CONST_INT_P (XEXP (e1, 1)))
29804             abort ();
29805
29806           asm_fprintf (out_file, "\t.pad #%wd\n",
29807                        -INTVAL (XEXP (e1, 1)));
29808         }
29809       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29810         {
29811           HOST_WIDE_INT offset;
29812
29813           if (GET_CODE (e1) == PLUS)
29814             {
29815               if (!REG_P (XEXP (e1, 0))
29816                   || !CONST_INT_P (XEXP (e1, 1)))
29817                 abort ();
29818               reg = REGNO (XEXP (e1, 0));
29819               offset = INTVAL (XEXP (e1, 1));
29820               asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
29821                            HARD_FRAME_POINTER_REGNUM, reg,
29822                            offset);
29823             }
29824           else if (REG_P (e1))
29825             {
29826               reg = REGNO (e1);
29827               asm_fprintf (out_file, "\t.setfp %r, %r\n",
29828                            HARD_FRAME_POINTER_REGNUM, reg);
29829             }
29830           else
29831             abort ();
29832         }
29833       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29834         {
29835           /* Move from sp to reg.  */
29836           asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
29837         }
29838      else if (GET_CODE (e1) == PLUS
29839               && REG_P (XEXP (e1, 0))
29840               && REGNO (XEXP (e1, 0)) == SP_REGNUM
29841               && CONST_INT_P (XEXP (e1, 1)))
29842         {
29843           /* Set reg to offset from sp.  */
29844           asm_fprintf (out_file, "\t.movsp %r, #%d\n",
29845                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29846         }
29847       else
29848         abort ();
29849       break;
29850
29851     default:
29852       abort ();
29853     }
29854 }
29855
29856
29857 /* Emit unwind directives for the given insn.  */
29858
29859 static void
29860 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
29861 {
29862   rtx note, pat;
29863   bool handled_one = false;
29864
29865   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29866     return;
29867
29868   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29869       && (TREE_NOTHROW (current_function_decl)
29870           || crtl->all_throwers_are_sibcalls))
29871     return;
29872
29873   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29874     return;
29875
29876   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29877     {
29878       switch (REG_NOTE_KIND (note))
29879         {
29880         case REG_FRAME_RELATED_EXPR:
29881           pat = XEXP (note, 0);
29882           goto found;
29883
29884         case REG_CFA_REGISTER:
29885           pat = XEXP (note, 0);
29886           if (pat == NULL)
29887             {
29888               pat = PATTERN (insn);
29889               if (GET_CODE (pat) == PARALLEL)
29890                 pat = XVECEXP (pat, 0, 0);
29891             }
29892
29893           /* Only emitted for IS_STACKALIGN re-alignment.  */
29894           {
29895             rtx dest, src;
29896             unsigned reg;
29897
29898             src = SET_SRC (pat);
29899             dest = SET_DEST (pat);
29900
29901             gcc_assert (src == stack_pointer_rtx);
29902             reg = REGNO (dest);
29903             asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29904                          reg + 0x90, reg);
29905           }
29906           handled_one = true;
29907           break;
29908
29909         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29910            to get correct dwarf information for shrink-wrap.  We should not
29911            emit unwind information for it because these are used either for
29912            pretend arguments or notes to adjust sp and restore registers from
29913            stack.  */
29914         case REG_CFA_DEF_CFA:
29915         case REG_CFA_ADJUST_CFA:
29916         case REG_CFA_RESTORE:
29917           return;
29918
29919         case REG_CFA_EXPRESSION:
29920         case REG_CFA_OFFSET:
29921           /* ??? Only handling here what we actually emit.  */
29922           gcc_unreachable ();
29923
29924         default:
29925           break;
29926         }
29927     }
29928   if (handled_one)
29929     return;
29930   pat = PATTERN (insn);
29931  found:
29932
29933   switch (GET_CODE (pat))
29934     {
29935     case SET:
29936       arm_unwind_emit_set (out_file, pat);
29937       break;
29938
29939     case SEQUENCE:
29940       /* Store multiple.  */
29941       arm_unwind_emit_sequence (out_file, pat);
29942       break;
29943
29944     default:
29945       abort();
29946     }
29947 }
29948
29949
29950 /* Output a reference from a function exception table to the type_info
29951    object X.  The EABI specifies that the symbol should be relocated by
29952    an R_ARM_TARGET2 relocation.  */
29953
29954 static bool
29955 arm_output_ttype (rtx x)
29956 {
29957   fputs ("\t.word\t", asm_out_file);
29958   output_addr_const (asm_out_file, x);
29959   /* Use special relocations for symbol references.  */
29960   if (!CONST_INT_P (x))
29961     fputs ("(TARGET2)", asm_out_file);
29962   fputc ('\n', asm_out_file);
29963
29964   return TRUE;
29965 }
29966
29967 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29968
29969 static void
29970 arm_asm_emit_except_personality (rtx personality)
29971 {
29972   fputs ("\t.personality\t", asm_out_file);
29973   output_addr_const (asm_out_file, personality);
29974   fputc ('\n', asm_out_file);
29975 }
29976 #endif /* ARM_UNWIND_INFO */
29977
29978 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29979
29980 static void
29981 arm_asm_init_sections (void)
29982 {
29983 #if ARM_UNWIND_INFO
29984   exception_section = get_unnamed_section (0, output_section_asm_op,
29985                                            "\t.handlerdata");
29986 #endif /* ARM_UNWIND_INFO */
29987
29988 #ifdef OBJECT_FORMAT_ELF
29989   if (target_pure_code)
29990     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29991 #endif
29992 }
29993
29994 /* Output unwind directives for the start/end of a function.  */
29995
29996 void
29997 arm_output_fn_unwind (FILE * f, bool prologue)
29998 {
29999   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30000     return;
30001
30002   if (prologue)
30003     fputs ("\t.fnstart\n", f);
30004   else
30005     {
30006       /* If this function will never be unwound, then mark it as such.
30007          The came condition is used in arm_unwind_emit to suppress
30008          the frame annotations.  */
30009       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30010           && (TREE_NOTHROW (current_function_decl)
30011               || crtl->all_throwers_are_sibcalls))
30012         fputs("\t.cantunwind\n", f);
30013
30014       fputs ("\t.fnend\n", f);
30015     }
30016 }
30017
30018 static bool
30019 arm_emit_tls_decoration (FILE *fp, rtx x)
30020 {
30021   enum tls_reloc reloc;
30022   rtx val;
30023
30024   val = XVECEXP (x, 0, 0);
30025   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30026
30027   output_addr_const (fp, val);
30028
30029   switch (reloc)
30030     {
30031     case TLS_GD32:
30032       fputs ("(tlsgd)", fp);
30033       break;
30034     case TLS_GD32_FDPIC:
30035       fputs ("(tlsgd_fdpic)", fp);
30036       break;
30037     case TLS_LDM32:
30038       fputs ("(tlsldm)", fp);
30039       break;
30040     case TLS_LDM32_FDPIC:
30041       fputs ("(tlsldm_fdpic)", fp);
30042       break;
30043     case TLS_LDO32:
30044       fputs ("(tlsldo)", fp);
30045       break;
30046     case TLS_IE32:
30047       fputs ("(gottpoff)", fp);
30048       break;
30049     case TLS_IE32_FDPIC:
30050       fputs ("(gottpoff_fdpic)", fp);
30051       break;
30052     case TLS_LE32:
30053       fputs ("(tpoff)", fp);
30054       break;
30055     case TLS_DESCSEQ:
30056       fputs ("(tlsdesc)", fp);
30057       break;
30058     default:
30059       gcc_unreachable ();
30060     }
30061
30062   switch (reloc)
30063     {
30064     case TLS_GD32:
30065     case TLS_LDM32:
30066     case TLS_IE32:
30067     case TLS_DESCSEQ:
30068       fputs (" + (. - ", fp);
30069       output_addr_const (fp, XVECEXP (x, 0, 2));
30070       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30071       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30072       output_addr_const (fp, XVECEXP (x, 0, 3));
30073       fputc (')', fp);
30074       break;
30075     default:
30076       break;
30077     }
30078
30079   return TRUE;
30080 }
30081
30082 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
30083
30084 static void
30085 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30086 {
30087   gcc_assert (size == 4);
30088   fputs ("\t.word\t", file);
30089   output_addr_const (file, x);
30090   fputs ("(tlsldo)", file);
30091 }
30092
30093 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
30094
30095 static bool
30096 arm_output_addr_const_extra (FILE *fp, rtx x)
30097 {
30098   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30099     return arm_emit_tls_decoration (fp, x);
30100   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30101     {
30102       char label[256];
30103       int labelno = INTVAL (XVECEXP (x, 0, 0));
30104
30105       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30106       assemble_name_raw (fp, label);
30107
30108       return TRUE;
30109     }
30110   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30111     {
30112       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30113       if (GOT_PCREL)
30114         fputs ("+.", fp);
30115       fputs ("-(", fp);
30116       output_addr_const (fp, XVECEXP (x, 0, 0));
30117       fputc (')', fp);
30118       return TRUE;
30119     }
30120   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30121     {
30122       output_addr_const (fp, XVECEXP (x, 0, 0));
30123       if (GOT_PCREL)
30124         fputs ("+.", fp);
30125       fputs ("-(", fp);
30126       output_addr_const (fp, XVECEXP (x, 0, 1));
30127       fputc (')', fp);
30128       return TRUE;
30129     }
30130   else if (GET_CODE (x) == CONST_VECTOR)
30131     return arm_emit_vector_const (fp, x);
30132
30133   return FALSE;
30134 }
30135
30136 /* Output assembly for a shift instruction.
30137    SET_FLAGS determines how the instruction modifies the condition codes.
30138    0 - Do not set condition codes.
30139    1 - Set condition codes.
30140    2 - Use smallest instruction.  */
30141 const char *
30142 arm_output_shift(rtx * operands, int set_flags)
30143 {
30144   char pattern[100];
30145   static const char flag_chars[3] = {'?', '.', '!'};
30146   const char *shift;
30147   HOST_WIDE_INT val;
30148   char c;
30149
30150   c = flag_chars[set_flags];
30151   shift = shift_op(operands[3], &val);
30152   if (shift)
30153     {
30154       if (val != -1)
30155         operands[2] = GEN_INT(val);
30156       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30157     }
30158   else
30159     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30160
30161   output_asm_insn (pattern, operands);
30162   return "";
30163 }
30164
30165 /* Output assembly for a WMMX immediate shift instruction.  */
30166 const char *
30167 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30168 {
30169   int shift = INTVAL (operands[2]);
30170   char templ[50];
30171   machine_mode opmode = GET_MODE (operands[0]);
30172
30173   gcc_assert (shift >= 0);
30174
30175   /* If the shift value in the register versions is > 63 (for D qualifier),
30176      31 (for W qualifier) or 15 (for H qualifier).  */
30177   if (((opmode == V4HImode) && (shift > 15))
30178         || ((opmode == V2SImode) && (shift > 31))
30179         || ((opmode == DImode) && (shift > 63)))
30180   {
30181     if (wror_or_wsra)
30182       {
30183         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30184         output_asm_insn (templ, operands);
30185         if (opmode == DImode)
30186           {
30187             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30188             output_asm_insn (templ, operands);
30189           }
30190       }
30191     else
30192       {
30193         /* The destination register will contain all zeros.  */
30194         sprintf (templ, "wzero\t%%0");
30195         output_asm_insn (templ, operands);
30196       }
30197     return "";
30198   }
30199
30200   if ((opmode == DImode) && (shift > 32))
30201     {
30202       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30203       output_asm_insn (templ, operands);
30204       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30205       output_asm_insn (templ, operands);
30206     }
30207   else
30208     {
30209       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30210       output_asm_insn (templ, operands);
30211     }
30212   return "";
30213 }
30214
30215 /* Output assembly for a WMMX tinsr instruction.  */
30216 const char *
30217 arm_output_iwmmxt_tinsr (rtx *operands)
30218 {
30219   int mask = INTVAL (operands[3]);
30220   int i;
30221   char templ[50];
30222   int units = mode_nunits[GET_MODE (operands[0])];
30223   gcc_assert ((mask & (mask - 1)) == 0);
30224   for (i = 0; i < units; ++i)
30225     {
30226       if ((mask & 0x01) == 1)
30227         {
30228           break;
30229         }
30230       mask >>= 1;
30231     }
30232   gcc_assert (i < units);
30233   {
30234     switch (GET_MODE (operands[0]))
30235       {
30236       case E_V8QImode:
30237         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30238         break;
30239       case E_V4HImode:
30240         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30241         break;
30242       case E_V2SImode:
30243         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30244         break;
30245       default:
30246         gcc_unreachable ();
30247         break;
30248       }
30249     output_asm_insn (templ, operands);
30250   }
30251   return "";
30252 }
30253
30254 /* Output a Thumb-1 casesi dispatch sequence.  */
30255 const char *
30256 thumb1_output_casesi (rtx *operands)
30257 {
30258   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30259
30260   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30261
30262   switch (GET_MODE(diff_vec))
30263     {
30264     case E_QImode:
30265       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30266               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30267     case E_HImode:
30268       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30269               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30270     case E_SImode:
30271       return "bl\t%___gnu_thumb1_case_si";
30272     default:
30273       gcc_unreachable ();
30274     }
30275 }
30276
30277 /* Output a Thumb-2 casesi instruction.  */
30278 const char *
30279 thumb2_output_casesi (rtx *operands)
30280 {
30281   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30282
30283   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30284
30285   output_asm_insn ("cmp\t%0, %1", operands);
30286   output_asm_insn ("bhi\t%l3", operands);
30287   switch (GET_MODE(diff_vec))
30288     {
30289     case E_QImode:
30290       return "tbb\t[%|pc, %0]";
30291     case E_HImode:
30292       return "tbh\t[%|pc, %0, lsl #1]";
30293     case E_SImode:
30294       if (flag_pic)
30295         {
30296           output_asm_insn ("adr\t%4, %l2", operands);
30297           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30298           output_asm_insn ("add\t%4, %4, %5", operands);
30299           return "bx\t%4";
30300         }
30301       else
30302         {
30303           output_asm_insn ("adr\t%4, %l2", operands);
30304           return "ldr\t%|pc, [%4, %0, lsl #2]";
30305         }
30306     default:
30307       gcc_unreachable ();
30308     }
30309 }
30310
30311 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30312    per-core tuning structs.  */
30313 static int
30314 arm_issue_rate (void)
30315 {
30316   return current_tune->issue_rate;
30317 }
30318
30319 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30320 static int
30321 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30322 {
30323   if (DEBUG_INSN_P (insn))
30324     return more;
30325
30326   rtx_code code = GET_CODE (PATTERN (insn));
30327   if (code == USE || code == CLOBBER)
30328     return more;
30329
30330   if (get_attr_type (insn) == TYPE_NO_INSN)
30331     return more;
30332
30333   return more - 1;
30334 }
30335
30336 /* Return how many instructions should scheduler lookahead to choose the
30337    best one.  */
30338 static int
30339 arm_first_cycle_multipass_dfa_lookahead (void)
30340 {
30341   int issue_rate = arm_issue_rate ();
30342
30343   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30344 }
30345
30346 /* Enable modeling of L2 auto-prefetcher.  */
30347 static int
30348 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30349 {
30350   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30351 }
30352
30353 const char *
30354 arm_mangle_type (const_tree type)
30355 {
30356   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30357      has to be managled as if it is in the "std" namespace.  */
30358   if (TARGET_AAPCS_BASED
30359       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30360     return "St9__va_list";
30361
30362   /* Half-precision floating point types.  */
30363   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30364     {
30365       if (TYPE_MODE (type) == BFmode)
30366         return "u6__bf16";
30367       else
30368         return "Dh";
30369     }
30370
30371   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30372      builtin type.  */
30373   if (TYPE_NAME (type) != NULL)
30374     return arm_mangle_builtin_type (type);
30375
30376   /* Use the default mangling.  */
30377   return NULL;
30378 }
30379
30380 /* Order of allocation of core registers for Thumb: this allocation is
30381    written over the corresponding initial entries of the array
30382    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30383    first.  Saving and restoring a low register is usually cheaper than
30384    using a call-clobbered high register.  */
30385
30386 static const int thumb_core_reg_alloc_order[] =
30387 {
30388    3,  2,  1,  0,  4,  5,  6,  7,
30389   12, 14,  8,  9, 10, 11
30390 };
30391
30392 /* Adjust register allocation order when compiling for Thumb.  */
30393
30394 void
30395 arm_order_regs_for_local_alloc (void)
30396 {
30397   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30398   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30399   if (TARGET_THUMB)
30400     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30401             sizeof (thumb_core_reg_alloc_order));
30402 }
30403
30404 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30405
30406 bool
30407 arm_frame_pointer_required (void)
30408 {
30409   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30410     return true;
30411
30412   /* If the function receives nonlocal gotos, it needs to save the frame
30413      pointer in the nonlocal_goto_save_area object.  */
30414   if (cfun->has_nonlocal_label)
30415     return true;
30416
30417   /* The frame pointer is required for non-leaf APCS frames.  */
30418   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30419     return true;
30420
30421   /* If we are probing the stack in the prologue, we will have a faulting
30422      instruction prior to the stack adjustment and this requires a frame
30423      pointer if we want to catch the exception using the EABI unwinder.  */
30424   if (!IS_INTERRUPT (arm_current_func_type ())
30425       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30426           || flag_stack_clash_protection)
30427       && arm_except_unwind_info (&global_options) == UI_TARGET
30428       && cfun->can_throw_non_call_exceptions)
30429     {
30430       HOST_WIDE_INT size = get_frame_size ();
30431
30432       /* That's irrelevant if there is no stack adjustment.  */
30433       if (size <= 0)
30434         return false;
30435
30436       /* That's relevant only if there is a stack probe.  */
30437       if (crtl->is_leaf && !cfun->calls_alloca)
30438         {
30439           /* We don't have the final size of the frame so adjust.  */
30440           size += 32 * UNITS_PER_WORD;
30441           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30442             return true;
30443         }
30444       else
30445         return true;
30446     }
30447
30448   return false;
30449 }
30450
30451 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30452    All modes except THUMB1 have conditional execution.
30453    If we have conditional arithmetic, return false before reload to
30454    enable some ifcvt transformations. */
30455 static bool
30456 arm_have_conditional_execution (void)
30457 {
30458   bool has_cond_exec, enable_ifcvt_trans;
30459
30460   /* Only THUMB1 cannot support conditional execution. */
30461   has_cond_exec = !TARGET_THUMB1;
30462
30463   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30464      before reload. */
30465   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30466
30467   return has_cond_exec && !enable_ifcvt_trans;
30468 }
30469
30470 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30471 static HOST_WIDE_INT
30472 arm_vector_alignment (const_tree type)
30473 {
30474   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30475
30476   if (TARGET_AAPCS_BASED)
30477     align = MIN (align, 64);
30478
30479   return align;
30480 }
30481
30482 static unsigned int
30483 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30484 {
30485   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30486     {
30487       modes->safe_push (V16QImode);
30488       modes->safe_push (V8QImode);
30489     }
30490   return 0;
30491 }
30492
30493 static bool
30494 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30495 {
30496   /* Vectors which aren't in packed structures will not be less aligned than
30497      the natural alignment of their element type, so this is safe.  */
30498   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30499     return !is_packed;
30500
30501   return default_builtin_vector_alignment_reachable (type, is_packed);
30502 }
30503
30504 static bool
30505 arm_builtin_support_vector_misalignment (machine_mode mode,
30506                                          const_tree type, int misalignment,
30507                                          bool is_packed)
30508 {
30509   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30510     {
30511       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30512
30513       if (is_packed)
30514         return align == 1;
30515
30516       /* If the misalignment is unknown, we should be able to handle the access
30517          so long as it is not to a member of a packed data structure.  */
30518       if (misalignment == -1)
30519         return true;
30520
30521       /* Return true if the misalignment is a multiple of the natural alignment
30522          of the vector's element type.  This is probably always going to be
30523          true in practice, since we've already established that this isn't a
30524          packed access.  */
30525       return ((misalignment % align) == 0);
30526     }
30527
30528   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30529                                                       is_packed);
30530 }
30531
30532 static void
30533 arm_conditional_register_usage (void)
30534 {
30535   int regno;
30536
30537   if (TARGET_THUMB1 && optimize_size)
30538     {
30539       /* When optimizing for size on Thumb-1, it's better not
30540         to use the HI regs, because of the overhead of
30541         stacking them.  */
30542       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30543         fixed_regs[regno] = call_used_regs[regno] = 1;
30544     }
30545
30546   /* The link register can be clobbered by any branch insn,
30547      but we have no way to track that at present, so mark
30548      it as unavailable.  */
30549   if (TARGET_THUMB1)
30550     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30551
30552   if (TARGET_32BIT && TARGET_VFP_BASE)
30553     {
30554       /* VFPv3 registers are disabled when earlier VFP
30555          versions are selected due to the definition of
30556          LAST_VFP_REGNUM.  */
30557       for (regno = FIRST_VFP_REGNUM;
30558            regno <= LAST_VFP_REGNUM; ++ regno)
30559         {
30560           fixed_regs[regno] = 0;
30561           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30562             || regno >= FIRST_VFP_REGNUM + 32;
30563         }
30564       if (TARGET_HAVE_MVE)
30565         fixed_regs[VPR_REGNUM] = 0;
30566     }
30567
30568   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30569     {
30570       regno = FIRST_IWMMXT_GR_REGNUM;
30571       /* The 2002/10/09 revision of the XScale ABI has wCG0
30572          and wCG1 as call-preserved registers.  The 2002/11/21
30573          revision changed this so that all wCG registers are
30574          scratch registers.  */
30575       for (regno = FIRST_IWMMXT_GR_REGNUM;
30576            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30577         fixed_regs[regno] = 0;
30578       /* The XScale ABI has wR0 - wR9 as scratch registers,
30579          the rest as call-preserved registers.  */
30580       for (regno = FIRST_IWMMXT_REGNUM;
30581            regno <= LAST_IWMMXT_REGNUM; ++ regno)
30582         {
30583           fixed_regs[regno] = 0;
30584           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30585         }
30586     }
30587
30588   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30589     {
30590       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30591       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30592     }
30593   else if (TARGET_APCS_STACK)
30594     {
30595       fixed_regs[10]     = 1;
30596       call_used_regs[10] = 1;
30597     }
30598   /* -mcaller-super-interworking reserves r11 for calls to
30599      _interwork_r11_call_via_rN().  Making the register global
30600      is an easy way of ensuring that it remains valid for all
30601      calls.  */
30602   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30603       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30604     {
30605       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30606       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30607       if (TARGET_CALLER_INTERWORKING)
30608         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30609     }
30610
30611   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30612   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30613   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30614
30615   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30616 }
30617
30618 static reg_class_t
30619 arm_preferred_rename_class (reg_class_t rclass)
30620 {
30621   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30622      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30623      and code size can be reduced.  */
30624   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30625     return LO_REGS;
30626   else
30627     return NO_REGS;
30628 }
30629
30630 /* Compute the attribute "length" of insn "*push_multi".
30631    So this function MUST be kept in sync with that insn pattern.  */
30632 int
30633 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30634 {
30635   int i, regno, hi_reg;
30636   int num_saves = XVECLEN (parallel_op, 0);
30637
30638   /* ARM mode.  */
30639   if (TARGET_ARM)
30640     return 4;
30641   /* Thumb1 mode.  */
30642   if (TARGET_THUMB1)
30643     return 2;
30644
30645   /* Thumb2 mode.  */
30646   regno = REGNO (first_op);
30647   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30648      list is 8-bit.  Normally this means all registers in the list must be
30649      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30650      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30651      with 16-bit encoding.  */
30652   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30653   for (i = 1; i < num_saves && !hi_reg; i++)
30654     {
30655       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30656       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30657     }
30658
30659   if (!hi_reg)
30660     return 2;
30661   return 4;
30662 }
30663
30664 /* Compute the attribute "length" of insn.  Currently, this function is used
30665    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30666    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30667    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30668    true if OPERANDS contains insn which explicit updates base register.  */
30669
30670 int
30671 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30672 {
30673   /* ARM mode.  */
30674   if (TARGET_ARM)
30675     return 4;
30676   /* Thumb1 mode.  */
30677   if (TARGET_THUMB1)
30678     return 2;
30679
30680   rtx parallel_op = operands[0];
30681   /* Initialize to elements number of PARALLEL.  */
30682   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30683   /* Initialize the value to base register.  */
30684   unsigned regno = REGNO (operands[1]);
30685   /* Skip return and write back pattern.
30686      We only need register pop pattern for later analysis.  */
30687   unsigned first_indx = 0;
30688   first_indx += return_pc ? 1 : 0;
30689   first_indx += write_back_p ? 1 : 0;
30690
30691   /* A pop operation can be done through LDM or POP.  If the base register is SP
30692      and if it's with write back, then a LDM will be alias of POP.  */
30693   bool pop_p = (regno == SP_REGNUM && write_back_p);
30694   bool ldm_p = !pop_p;
30695
30696   /* Check base register for LDM.  */
30697   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30698     return 4;
30699
30700   /* Check each register in the list.  */
30701   for (; indx >= first_indx; indx--)
30702     {
30703       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30704       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30705          comment in arm_attr_length_push_multi.  */
30706       if (REGNO_REG_CLASS (regno) == HI_REGS
30707           && (regno != PC_REGNUM || ldm_p))
30708         return 4;
30709     }
30710
30711   return 2;
30712 }
30713
30714 /* Compute the number of instructions emitted by output_move_double.  */
30715 int
30716 arm_count_output_move_double_insns (rtx *operands)
30717 {
30718   int count;
30719   rtx ops[2];
30720   /* output_move_double may modify the operands array, so call it
30721      here on a copy of the array.  */
30722   ops[0] = operands[0];
30723   ops[1] = operands[1];
30724   output_move_double (ops, false, &count);
30725   return count;
30726 }
30727
30728 /* Same as above, but operands are a register/memory pair in SImode.
30729    Assumes operands has the base register in position 0 and memory in position
30730    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
30731 int
30732 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30733 {
30734   int count;
30735   rtx ops[2];
30736   int regnum, memnum;
30737   if (load)
30738     regnum = 0, memnum = 1;
30739   else
30740     regnum = 1, memnum = 0;
30741   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30742   ops[memnum] = adjust_address (operands[2], DImode, 0);
30743   output_move_double (ops, false, &count);
30744   return count;
30745 }
30746
30747
30748 int
30749 vfp3_const_double_for_fract_bits (rtx operand)
30750 {
30751   REAL_VALUE_TYPE r0;
30752
30753   if (!CONST_DOUBLE_P (operand))
30754     return 0;
30755
30756   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30757   if (exact_real_inverse (DFmode, &r0)
30758       && !REAL_VALUE_NEGATIVE (r0))
30759     {
30760       if (exact_real_truncate (DFmode, &r0))
30761         {
30762           HOST_WIDE_INT value = real_to_integer (&r0);
30763           value = value & 0xffffffff;
30764           if ((value != 0) && ( (value & (value - 1)) == 0))
30765             {
30766               int ret = exact_log2 (value);
30767               gcc_assert (IN_RANGE (ret, 0, 31));
30768               return ret;
30769             }
30770         }
30771     }
30772   return 0;
30773 }
30774
30775 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30776    log2 is in [1, 32], return that log2.  Otherwise return -1.
30777    This is used in the patterns for vcvt.s32.f32 floating-point to
30778    fixed-point conversions.  */
30779
30780 int
30781 vfp3_const_double_for_bits (rtx x)
30782 {
30783   const REAL_VALUE_TYPE *r;
30784
30785   if (!CONST_DOUBLE_P (x))
30786     return -1;
30787
30788   r = CONST_DOUBLE_REAL_VALUE (x);
30789
30790   if (REAL_VALUE_NEGATIVE (*r)
30791       || REAL_VALUE_ISNAN (*r)
30792       || REAL_VALUE_ISINF (*r)
30793       || !real_isinteger (r, SFmode))
30794     return -1;
30795
30796   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30797
30798 /* The exact_log2 above will have returned -1 if this is
30799    not an exact log2.  */
30800   if (!IN_RANGE (hwint, 1, 32))
30801     return -1;
30802
30803   return hwint;
30804 }
30805
30806 \f
30807 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30808
30809 static void
30810 arm_pre_atomic_barrier (enum memmodel model)
30811 {
30812   if (need_atomic_barrier_p (model, true))
30813     emit_insn (gen_memory_barrier ());
30814 }
30815
30816 static void
30817 arm_post_atomic_barrier (enum memmodel model)
30818 {
30819   if (need_atomic_barrier_p (model, false))
30820     emit_insn (gen_memory_barrier ());
30821 }
30822
30823 /* Emit the load-exclusive and store-exclusive instructions.
30824    Use acquire and release versions if necessary.  */
30825
30826 static void
30827 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30828 {
30829   rtx (*gen) (rtx, rtx);
30830
30831   if (acq)
30832     {
30833       switch (mode)
30834         {
30835         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30836         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30837         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30838         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30839         default:
30840           gcc_unreachable ();
30841         }
30842     }
30843   else
30844     {
30845       switch (mode)
30846         {
30847         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30848         case E_HImode: gen = gen_arm_load_exclusivehi; break;
30849         case E_SImode: gen = gen_arm_load_exclusivesi; break;
30850         case E_DImode: gen = gen_arm_load_exclusivedi; break;
30851         default:
30852           gcc_unreachable ();
30853         }
30854     }
30855
30856   emit_insn (gen (rval, mem));
30857 }
30858
30859 static void
30860 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30861                           rtx mem, bool rel)
30862 {
30863   rtx (*gen) (rtx, rtx, rtx);
30864
30865   if (rel)
30866     {
30867       switch (mode)
30868         {
30869         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30870         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30871         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30872         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30873         default:
30874           gcc_unreachable ();
30875         }
30876     }
30877   else
30878     {
30879       switch (mode)
30880         {
30881         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30882         case E_HImode: gen = gen_arm_store_exclusivehi; break;
30883         case E_SImode: gen = gen_arm_store_exclusivesi; break;
30884         case E_DImode: gen = gen_arm_store_exclusivedi; break;
30885         default:
30886           gcc_unreachable ();
30887         }
30888     }
30889
30890   emit_insn (gen (bval, rval, mem));
30891 }
30892
30893 /* Mark the previous jump instruction as unlikely.  */
30894
30895 static void
30896 emit_unlikely_jump (rtx insn)
30897 {
30898   rtx_insn *jump = emit_jump_insn (insn);
30899   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30900 }
30901
30902 /* Expand a compare and swap pattern.  */
30903
30904 void
30905 arm_expand_compare_and_swap (rtx operands[])
30906 {
30907   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30908   machine_mode mode, cmp_mode;
30909
30910   bval = operands[0];
30911   rval = operands[1];
30912   mem = operands[2];
30913   oldval = operands[3];
30914   newval = operands[4];
30915   is_weak = operands[5];
30916   mod_s = operands[6];
30917   mod_f = operands[7];
30918   mode = GET_MODE (mem);
30919
30920   /* Normally the succ memory model must be stronger than fail, but in the
30921      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30922      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30923
30924   if (TARGET_HAVE_LDACQ
30925       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30926       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30927     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30928
30929   switch (mode)
30930     {
30931     case E_QImode:
30932     case E_HImode:
30933       /* For narrow modes, we're going to perform the comparison in SImode,
30934          so do the zero-extension now.  */
30935       rval = gen_reg_rtx (SImode);
30936       oldval = convert_modes (SImode, mode, oldval, true);
30937       /* FALLTHRU */
30938
30939     case E_SImode:
30940       /* Force the value into a register if needed.  We waited until after
30941          the zero-extension above to do this properly.  */
30942       if (!arm_add_operand (oldval, SImode))
30943         oldval = force_reg (SImode, oldval);
30944       break;
30945
30946     case E_DImode:
30947       if (!cmpdi_operand (oldval, mode))
30948         oldval = force_reg (mode, oldval);
30949       break;
30950
30951     default:
30952       gcc_unreachable ();
30953     }
30954
30955   if (TARGET_THUMB1)
30956     cmp_mode = E_SImode;
30957   else
30958     cmp_mode = CC_Zmode;
30959
30960   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30961   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30962                                         oldval, newval, is_weak, mod_s, mod_f));
30963
30964   if (mode == QImode || mode == HImode)
30965     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30966
30967   /* In all cases, we arrange for success to be signaled by Z set.
30968      This arrangement allows for the boolean result to be used directly
30969      in a subsequent branch, post optimization.  For Thumb-1 targets, the
30970      boolean negation of the result is also stored in bval because Thumb-1
30971      backend lacks dependency tracking for CC flag due to flag-setting not
30972      being represented at RTL level.  */
30973   if (TARGET_THUMB1)
30974       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30975   else
30976     {
30977       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30978       emit_insn (gen_rtx_SET (bval, x));
30979     }
30980 }
30981
30982 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30983    another memory store between the load-exclusive and store-exclusive can
30984    reset the monitor from Exclusive to Open state.  This means we must wait
30985    until after reload to split the pattern, lest we get a register spill in
30986    the middle of the atomic sequence.  Success of the compare and swap is
30987    indicated by the Z flag set for 32bit targets and by neg_bval being zero
30988    for Thumb-1 targets (ie. negation of the boolean value returned by
30989    atomic_compare_and_swapmode standard pattern in operand 0).  */
30990
30991 void
30992 arm_split_compare_and_swap (rtx operands[])
30993 {
30994   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30995   machine_mode mode;
30996   enum memmodel mod_s, mod_f;
30997   bool is_weak;
30998   rtx_code_label *label1, *label2;
30999   rtx x, cond;
31000
31001   rval = operands[1];
31002   mem = operands[2];
31003   oldval = operands[3];
31004   newval = operands[4];
31005   is_weak = (operands[5] != const0_rtx);
31006   mod_s_rtx = operands[6];
31007   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31008   mod_f = memmodel_from_int (INTVAL (operands[7]));
31009   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31010   mode = GET_MODE (mem);
31011
31012   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31013
31014   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31015   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31016
31017   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
31018      a full barrier is emitted after the store-release.  */
31019   if (is_armv8_sync)
31020     use_acquire = false;
31021
31022   /* Checks whether a barrier is needed and emits one accordingly.  */
31023   if (!(use_acquire || use_release))
31024     arm_pre_atomic_barrier (mod_s);
31025
31026   label1 = NULL;
31027   if (!is_weak)
31028     {
31029       label1 = gen_label_rtx ();
31030       emit_label (label1);
31031     }
31032   label2 = gen_label_rtx ();
31033
31034   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31035
31036   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31037      as required to communicate with arm_expand_compare_and_swap.  */
31038   if (TARGET_32BIT)
31039     {
31040       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31041       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31042       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31043                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31044       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31045     }
31046   else
31047     {
31048       cond = gen_rtx_NE (VOIDmode, rval, oldval);
31049       if (thumb1_cmpneg_operand (oldval, SImode))
31050         {
31051           rtx src = rval;
31052           if (!satisfies_constraint_L (oldval))
31053             {
31054               gcc_assert (satisfies_constraint_J (oldval));
31055
31056               /* For such immediates, ADDS needs the source and destination regs
31057                  to be the same.
31058
31059                  Normally this would be handled by RA, but this is all happening
31060                  after RA.  */
31061               emit_move_insn (neg_bval, rval);
31062               src = neg_bval;
31063             }
31064
31065           emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31066                                                        label2, cond));
31067         }
31068       else
31069         {
31070           emit_move_insn (neg_bval, const1_rtx);
31071           emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31072         }
31073     }
31074
31075   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31076
31077   /* Weak or strong, we want EQ to be true for success, so that we
31078      match the flags that we got from the compare above.  */
31079   if (TARGET_32BIT)
31080     {
31081       cond = gen_rtx_REG (CCmode, CC_REGNUM);
31082       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31083       emit_insn (gen_rtx_SET (cond, x));
31084     }
31085
31086   if (!is_weak)
31087     {
31088       /* Z is set to boolean value of !neg_bval, as required to communicate
31089          with arm_expand_compare_and_swap.  */
31090       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31091       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31092     }
31093
31094   if (!is_mm_relaxed (mod_f))
31095     emit_label (label2);
31096
31097   /* Checks whether a barrier is needed and emits one accordingly.  */
31098   if (is_armv8_sync
31099       || !(use_acquire || use_release))
31100     arm_post_atomic_barrier (mod_s);
31101
31102   if (is_mm_relaxed (mod_f))
31103     emit_label (label2);
31104 }
31105
31106 /* Split an atomic operation pattern.  Operation is given by CODE and is one
31107    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31108    operation).  Operation is performed on the content at MEM and on VALUE
31109    following the memory model MODEL_RTX.  The content at MEM before and after
31110    the operation is returned in OLD_OUT and NEW_OUT respectively while the
31111    success of the operation is returned in COND.  Using a scratch register or
31112    an operand register for these determines what result is returned for that
31113    pattern.  */
31114
31115 void
31116 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31117                      rtx value, rtx model_rtx, rtx cond)
31118 {
31119   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31120   machine_mode mode = GET_MODE (mem);
31121   machine_mode wmode = (mode == DImode ? DImode : SImode);
31122   rtx_code_label *label;
31123   bool all_low_regs, bind_old_new;
31124   rtx x;
31125
31126   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31127
31128   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31129   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31130
31131   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
31132      a full barrier is emitted after the store-release.  */
31133   if (is_armv8_sync)
31134     use_acquire = false;
31135
31136   /* Checks whether a barrier is needed and emits one accordingly.  */
31137   if (!(use_acquire || use_release))
31138     arm_pre_atomic_barrier (model);
31139
31140   label = gen_label_rtx ();
31141   emit_label (label);
31142
31143   if (new_out)
31144     new_out = gen_lowpart (wmode, new_out);
31145   if (old_out)
31146     old_out = gen_lowpart (wmode, old_out);
31147   else
31148     old_out = new_out;
31149   value = simplify_gen_subreg (wmode, value, mode, 0);
31150
31151   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31152
31153   /* Does the operation require destination and first operand to use the same
31154      register?  This is decided by register constraints of relevant insn
31155      patterns in thumb1.md.  */
31156   gcc_assert (!new_out || REG_P (new_out));
31157   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31158                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31159                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31160   bind_old_new =
31161     (TARGET_THUMB1
31162      && code != SET
31163      && code != MINUS
31164      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31165
31166   /* We want to return the old value while putting the result of the operation
31167      in the same register as the old value so copy the old value over to the
31168      destination register and use that register for the operation.  */
31169   if (old_out && bind_old_new)
31170     {
31171       emit_move_insn (new_out, old_out);
31172       old_out = new_out;
31173     }
31174
31175   switch (code)
31176     {
31177     case SET:
31178       new_out = value;
31179       break;
31180
31181     case NOT:
31182       x = gen_rtx_AND (wmode, old_out, value);
31183       emit_insn (gen_rtx_SET (new_out, x));
31184       x = gen_rtx_NOT (wmode, new_out);
31185       emit_insn (gen_rtx_SET (new_out, x));
31186       break;
31187
31188     case MINUS:
31189       if (CONST_INT_P (value))
31190         {
31191           value = gen_int_mode (-INTVAL (value), wmode);
31192           code = PLUS;
31193         }
31194       /* FALLTHRU */
31195
31196     case PLUS:
31197       if (mode == DImode)
31198         {
31199           /* DImode plus/minus need to clobber flags.  */
31200           /* The adddi3 and subdi3 patterns are incorrectly written so that
31201              they require matching operands, even when we could easily support
31202              three operands.  Thankfully, this can be fixed up post-splitting,
31203              as the individual add+adc patterns do accept three operands and
31204              post-reload cprop can make these moves go away.  */
31205           emit_move_insn (new_out, old_out);
31206           if (code == PLUS)
31207             x = gen_adddi3 (new_out, new_out, value);
31208           else
31209             x = gen_subdi3 (new_out, new_out, value);
31210           emit_insn (x);
31211           break;
31212         }
31213       /* FALLTHRU */
31214
31215     default:
31216       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31217       emit_insn (gen_rtx_SET (new_out, x));
31218       break;
31219     }
31220
31221   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31222                             use_release);
31223
31224   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31225   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31226
31227   /* Checks whether a barrier is needed and emits one accordingly.  */
31228   if (is_armv8_sync
31229       || !(use_acquire || use_release))
31230     arm_post_atomic_barrier (model);
31231 }
31232 \f
31233 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
31234 opt_machine_mode
31235 arm_mode_to_pred_mode (machine_mode mode)
31236 {
31237   switch (GET_MODE_NUNITS (mode))
31238     {
31239     case 16: return V16BImode;
31240     case 8: return V8BImode;
31241     case 4: return V4BImode;
31242     }
31243   return opt_machine_mode ();
31244 }
31245
31246 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31247    If CAN_INVERT, store either the result or its inverse in TARGET
31248    and return true if TARGET contains the inverse.  If !CAN_INVERT,
31249    always store the result in TARGET, never its inverse.
31250
31251    Note that the handling of floating-point comparisons is not
31252    IEEE compliant.  */
31253
31254 bool
31255 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31256                            bool can_invert)
31257 {
31258   machine_mode cmp_result_mode = GET_MODE (target);
31259   machine_mode cmp_mode = GET_MODE (op0);
31260
31261   bool inverted;
31262
31263   /* MVE supports more comparisons than Neon.  */
31264   if (TARGET_HAVE_MVE)
31265       inverted = false;
31266   else
31267     switch (code)
31268       {
31269         /* For these we need to compute the inverse of the requested
31270            comparison.  */
31271       case UNORDERED:
31272       case UNLT:
31273       case UNLE:
31274       case UNGT:
31275       case UNGE:
31276       case UNEQ:
31277       case NE:
31278         code = reverse_condition_maybe_unordered (code);
31279         if (!can_invert)
31280           {
31281             /* Recursively emit the inverted comparison into a temporary
31282                and then store its inverse in TARGET.  This avoids reusing
31283                TARGET (which for integer NE could be one of the inputs).  */
31284             rtx tmp = gen_reg_rtx (cmp_result_mode);
31285             if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31286               gcc_unreachable ();
31287             emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31288             return false;
31289           }
31290         inverted = true;
31291         break;
31292
31293       default:
31294         inverted = false;
31295         break;
31296       }
31297
31298   switch (code)
31299     {
31300     /* These are natively supported by Neon for zero comparisons, but otherwise
31301        require the operands to be swapped. For MVE, we can only compare
31302        registers.  */
31303     case LE:
31304     case LT:
31305       if (!TARGET_HAVE_MVE)
31306         if (op1 != CONST0_RTX (cmp_mode))
31307           {
31308             code = swap_condition (code);
31309             std::swap (op0, op1);
31310           }
31311       /* Fall through.  */
31312
31313     /* These are natively supported by Neon for both register and zero
31314        operands. MVE supports registers only.  */
31315     case EQ:
31316     case GE:
31317     case GT:
31318     case NE:
31319       if (TARGET_HAVE_MVE)
31320         {
31321           switch (GET_MODE_CLASS (cmp_mode))
31322             {
31323             case MODE_VECTOR_INT:
31324               emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31325                                         op0, force_reg (cmp_mode, op1)));
31326               break;
31327             case MODE_VECTOR_FLOAT:
31328               if (TARGET_HAVE_MVE_FLOAT)
31329                 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31330                                             op0, force_reg (cmp_mode, op1)));
31331               else
31332                 gcc_unreachable ();
31333               break;
31334             default:
31335               gcc_unreachable ();
31336             }
31337         }
31338       else
31339         emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31340       return inverted;
31341
31342     /* These are natively supported for register operands only.
31343        Comparisons with zero aren't useful and should be folded
31344        or canonicalized by target-independent code.  */
31345     case GEU:
31346     case GTU:
31347       if (TARGET_HAVE_MVE)
31348         emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31349                                   op0, force_reg (cmp_mode, op1)));
31350       else
31351         emit_insn (gen_neon_vc (code, cmp_mode, target,
31352                                 op0, force_reg (cmp_mode, op1)));
31353       return inverted;
31354
31355     /* These require the operands to be swapped and likewise do not
31356        support comparisons with zero.  */
31357     case LEU:
31358     case LTU:
31359       if (TARGET_HAVE_MVE)
31360         emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31361                                   force_reg (cmp_mode, op1), op0));
31362       else
31363         emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31364                                 target, force_reg (cmp_mode, op1), op0));
31365       return inverted;
31366
31367     /* These need a combination of two comparisons.  */
31368     case LTGT:
31369     case ORDERED:
31370       {
31371         /* Operands are LTGT iff (a > b || a > b).
31372            Operands are ORDERED iff (a > b || a <= b).  */
31373         rtx gt_res = gen_reg_rtx (cmp_result_mode);
31374         rtx alt_res = gen_reg_rtx (cmp_result_mode);
31375         rtx_code alt_code = (code == LTGT ? LT : LE);
31376         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31377             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31378           gcc_unreachable ();
31379         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31380                                                      gt_res, alt_res)));
31381         return inverted;
31382       }
31383
31384     default:
31385       gcc_unreachable ();
31386     }
31387 }
31388
31389 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31390    CMP_RESULT_MODE is the mode of the comparison result.  */
31391
31392 void
31393 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31394 {
31395   /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31396      arm_expand_vector_compare, and another one here.  */
31397   rtx mask;
31398
31399   if (TARGET_HAVE_MVE)
31400     mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31401   else
31402     mask = gen_reg_rtx (cmp_result_mode);
31403
31404   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31405                                              operands[4], operands[5], true);
31406   if (inverted)
31407     std::swap (operands[1], operands[2]);
31408   if (TARGET_NEON)
31409   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31410                             mask, operands[1], operands[2]));
31411   else
31412     {
31413       machine_mode cmp_mode = GET_MODE (operands[0]);
31414
31415       switch (GET_MODE_CLASS (cmp_mode))
31416         {
31417         case MODE_VECTOR_INT:
31418           emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
31419                                      operands[1], operands[2], mask));
31420           break;
31421         case MODE_VECTOR_FLOAT:
31422           if (TARGET_HAVE_MVE_FLOAT)
31423             emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
31424                                          operands[1], operands[2], mask));
31425           else
31426             gcc_unreachable ();
31427           break;
31428         default:
31429           gcc_unreachable ();
31430         }
31431     }
31432 }
31433 \f
31434 #define MAX_VECT_LEN 16
31435
31436 struct expand_vec_perm_d
31437 {
31438   rtx target, op0, op1;
31439   vec_perm_indices perm;
31440   machine_mode vmode;
31441   bool one_vector_p;
31442   bool testing_p;
31443 };
31444
31445 /* Generate a variable permutation.  */
31446
31447 static void
31448 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31449 {
31450   machine_mode vmode = GET_MODE (target);
31451   bool one_vector_p = rtx_equal_p (op0, op1);
31452
31453   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31454   gcc_checking_assert (GET_MODE (op0) == vmode);
31455   gcc_checking_assert (GET_MODE (op1) == vmode);
31456   gcc_checking_assert (GET_MODE (sel) == vmode);
31457   gcc_checking_assert (TARGET_NEON);
31458
31459   if (one_vector_p)
31460     {
31461       if (vmode == V8QImode)
31462         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31463       else
31464         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31465     }
31466   else
31467     {
31468       rtx pair;
31469
31470       if (vmode == V8QImode)
31471         {
31472           pair = gen_reg_rtx (V16QImode);
31473           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31474           pair = gen_lowpart (TImode, pair);
31475           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31476         }
31477       else
31478         {
31479           pair = gen_reg_rtx (OImode);
31480           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31481           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31482         }
31483     }
31484 }
31485
31486 void
31487 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31488 {
31489   machine_mode vmode = GET_MODE (target);
31490   unsigned int nelt = GET_MODE_NUNITS (vmode);
31491   bool one_vector_p = rtx_equal_p (op0, op1);
31492   rtx mask;
31493
31494   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31495      numbering of elements for big-endian, we must reverse the order.  */
31496   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31497
31498   /* The VTBL instruction does not use a modulo index, so we must take care
31499      of that ourselves.  */
31500   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31501   mask = gen_const_vec_duplicate (vmode, mask);
31502   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31503
31504   arm_expand_vec_perm_1 (target, op0, op1, sel);
31505 }
31506
31507 /* Map lane ordering between architectural lane order, and GCC lane order,
31508    taking into account ABI.  See comment above output_move_neon for details.  */
31509
31510 static int
31511 neon_endian_lane_map (machine_mode mode, int lane)
31512 {
31513   if (BYTES_BIG_ENDIAN)
31514   {
31515     int nelems = GET_MODE_NUNITS (mode);
31516     /* Reverse lane order.  */
31517     lane = (nelems - 1 - lane);
31518     /* Reverse D register order, to match ABI.  */
31519     if (GET_MODE_SIZE (mode) == 16)
31520       lane = lane ^ (nelems / 2);
31521   }
31522   return lane;
31523 }
31524
31525 /* Some permutations index into pairs of vectors, this is a helper function
31526    to map indexes into those pairs of vectors.  */
31527
31528 static int
31529 neon_pair_endian_lane_map (machine_mode mode, int lane)
31530 {
31531   int nelem = GET_MODE_NUNITS (mode);
31532   if (BYTES_BIG_ENDIAN)
31533     lane =
31534       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31535   return lane;
31536 }
31537
31538 /* Generate or test for an insn that supports a constant permutation.  */
31539
31540 /* Recognize patterns for the VUZP insns.  */
31541
31542 static bool
31543 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31544 {
31545   unsigned int i, odd, mask, nelt = d->perm.length ();
31546   rtx out0, out1, in0, in1;
31547   int first_elem;
31548   int swap_nelt;
31549
31550   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31551     return false;
31552
31553   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31554      big endian pattern on 64 bit vectors, so we correct for that.  */
31555   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31556     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31557
31558   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31559
31560   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31561     odd = 0;
31562   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31563     odd = 1;
31564   else
31565     return false;
31566   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31567
31568   for (i = 0; i < nelt; i++)
31569     {
31570       unsigned elt =
31571         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31572       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31573         return false;
31574     }
31575
31576   /* Success!  */
31577   if (d->testing_p)
31578     return true;
31579
31580   in0 = d->op0;
31581   in1 = d->op1;
31582   if (swap_nelt != 0)
31583     std::swap (in0, in1);
31584
31585   out0 = d->target;
31586   out1 = gen_reg_rtx (d->vmode);
31587   if (odd)
31588     std::swap (out0, out1);
31589
31590   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31591   return true;
31592 }
31593
31594 /* Recognize patterns for the VZIP insns.  */
31595
31596 static bool
31597 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31598 {
31599   unsigned int i, high, mask, nelt = d->perm.length ();
31600   rtx out0, out1, in0, in1;
31601   int first_elem;
31602   bool is_swapped;
31603
31604   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31605     return false;
31606
31607   is_swapped = BYTES_BIG_ENDIAN;
31608
31609   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31610
31611   high = nelt / 2;
31612   if (first_elem == neon_endian_lane_map (d->vmode, high))
31613     ;
31614   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31615     high = 0;
31616   else
31617     return false;
31618   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31619
31620   for (i = 0; i < nelt / 2; i++)
31621     {
31622       unsigned elt =
31623         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31624       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31625           != elt)
31626         return false;
31627       elt =
31628         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31629       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31630           != elt)
31631         return false;
31632     }
31633
31634   /* Success!  */
31635   if (d->testing_p)
31636     return true;
31637
31638   in0 = d->op0;
31639   in1 = d->op1;
31640   if (is_swapped)
31641     std::swap (in0, in1);
31642
31643   out0 = d->target;
31644   out1 = gen_reg_rtx (d->vmode);
31645   if (high)
31646     std::swap (out0, out1);
31647
31648   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31649   return true;
31650 }
31651
31652 /* Recognize patterns for the VREV insns.  */
31653 static bool
31654 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31655 {
31656   unsigned int i, j, diff, nelt = d->perm.length ();
31657   rtx (*gen) (machine_mode, rtx, rtx);
31658
31659   if (!d->one_vector_p)
31660     return false;
31661
31662   diff = d->perm[0];
31663   switch (diff)
31664     {
31665     case 7:
31666        switch (d->vmode)
31667         {
31668          case E_V16QImode:
31669          case E_V8QImode:
31670           gen = gen_neon_vrev64;
31671           break;
31672          default:
31673           return false;
31674         }
31675        break;
31676     case 3:
31677        switch (d->vmode)
31678         {
31679         case E_V16QImode:
31680         case E_V8QImode:
31681           gen = gen_neon_vrev32;
31682           break;
31683         case E_V8HImode:
31684         case E_V4HImode:
31685         case E_V8HFmode:
31686         case E_V4HFmode:
31687           gen = gen_neon_vrev64;
31688           break;
31689         default:
31690           return false;
31691         }
31692       break;
31693     case 1:
31694       switch (d->vmode)
31695         {
31696         case E_V16QImode:
31697         case E_V8QImode:
31698           gen = gen_neon_vrev16;
31699           break;
31700         case E_V8HImode:
31701         case E_V4HImode:
31702           gen = gen_neon_vrev32;
31703           break;
31704         case E_V4SImode:
31705         case E_V2SImode:
31706         case E_V4SFmode:
31707         case E_V2SFmode:
31708           gen = gen_neon_vrev64;
31709           break;
31710         default:
31711           return false;
31712         }
31713       break;
31714     default:
31715       return false;
31716     }
31717
31718   for (i = 0; i < nelt ; i += diff + 1)
31719     for (j = 0; j <= diff; j += 1)
31720       {
31721         /* This is guaranteed to be true as the value of diff
31722            is 7, 3, 1 and we should have enough elements in the
31723            queue to generate this. Getting a vector mask with a
31724            value of diff other than these values implies that
31725            something is wrong by the time we get here.  */
31726         gcc_assert (i + j < nelt);
31727         if (d->perm[i + j] != i + diff - j)
31728           return false;
31729       }
31730
31731   /* Success! */
31732   if (d->testing_p)
31733     return true;
31734
31735   emit_insn (gen (d->vmode, d->target, d->op0));
31736   return true;
31737 }
31738
31739 /* Recognize patterns for the VTRN insns.  */
31740
31741 static bool
31742 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31743 {
31744   unsigned int i, odd, mask, nelt = d->perm.length ();
31745   rtx out0, out1, in0, in1;
31746
31747   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31748     return false;
31749
31750   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31751   if (d->perm[0] == 0)
31752     odd = 0;
31753   else if (d->perm[0] == 1)
31754     odd = 1;
31755   else
31756     return false;
31757   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31758
31759   for (i = 0; i < nelt; i += 2)
31760     {
31761       if (d->perm[i] != i + odd)
31762         return false;
31763       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31764         return false;
31765     }
31766
31767   /* Success!  */
31768   if (d->testing_p)
31769     return true;
31770
31771   in0 = d->op0;
31772   in1 = d->op1;
31773   if (BYTES_BIG_ENDIAN)
31774     {
31775       std::swap (in0, in1);
31776       odd = !odd;
31777     }
31778
31779   out0 = d->target;
31780   out1 = gen_reg_rtx (d->vmode);
31781   if (odd)
31782     std::swap (out0, out1);
31783
31784   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31785   return true;
31786 }
31787
31788 /* Recognize patterns for the VEXT insns.  */
31789
31790 static bool
31791 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31792 {
31793   unsigned int i, nelt = d->perm.length ();
31794   rtx offset;
31795
31796   unsigned int location;
31797
31798   unsigned int next  = d->perm[0] + 1;
31799
31800   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31801   if (BYTES_BIG_ENDIAN)
31802     return false;
31803
31804   /* Check if the extracted indexes are increasing by one.  */
31805   for (i = 1; i < nelt; next++, i++)
31806     {
31807       /* If we hit the most significant element of the 2nd vector in
31808          the previous iteration, no need to test further.  */
31809       if (next == 2 * nelt)
31810         return false;
31811
31812       /* If we are operating on only one vector: it could be a
31813          rotation.  If there are only two elements of size < 64, let
31814          arm_evpc_neon_vrev catch it.  */
31815       if (d->one_vector_p && (next == nelt))
31816         {
31817           if ((nelt == 2) && (d->vmode != V2DImode))
31818             return false;
31819           else
31820             next = 0;
31821         }
31822
31823       if (d->perm[i] != next)
31824         return false;
31825     }
31826
31827   location = d->perm[0];
31828
31829   /* Success! */
31830   if (d->testing_p)
31831     return true;
31832
31833   offset = GEN_INT (location);
31834
31835   if(d->vmode == E_DImode)
31836     return false;
31837
31838   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31839   return true;
31840 }
31841
31842 /* The NEON VTBL instruction is a fully variable permuation that's even
31843    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31844    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31845    can do slightly better by expanding this as a constant where we don't
31846    have to apply a mask.  */
31847
31848 static bool
31849 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31850 {
31851   rtx rperm[MAX_VECT_LEN], sel;
31852   machine_mode vmode = d->vmode;
31853   unsigned int i, nelt = d->perm.length ();
31854
31855   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31856      numbering of elements for big-endian, we must reverse the order.  */
31857   if (BYTES_BIG_ENDIAN)
31858     return false;
31859
31860   if (d->testing_p)
31861     return true;
31862
31863   /* Generic code will try constant permutation twice.  Once with the
31864      original mode and again with the elements lowered to QImode.
31865      So wait and don't do the selector expansion ourselves.  */
31866   if (vmode != V8QImode && vmode != V16QImode)
31867     return false;
31868
31869   for (i = 0; i < nelt; ++i)
31870     rperm[i] = GEN_INT (d->perm[i]);
31871   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31872   sel = force_reg (vmode, sel);
31873
31874   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31875   return true;
31876 }
31877
31878 static bool
31879 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31880 {
31881   /* Check if the input mask matches vext before reordering the
31882      operands.  */
31883   if (TARGET_NEON)
31884     if (arm_evpc_neon_vext (d))
31885       return true;
31886
31887   /* The pattern matching functions above are written to look for a small
31888      number to begin the sequence (0, 1, N/2).  If we begin with an index
31889      from the second operand, we can swap the operands.  */
31890   unsigned int nelt = d->perm.length ();
31891   if (d->perm[0] >= nelt)
31892     {
31893       d->perm.rotate_inputs (1);
31894       std::swap (d->op0, d->op1);
31895     }
31896
31897   if (TARGET_NEON)
31898     {
31899       if (arm_evpc_neon_vuzp (d))
31900         return true;
31901       if (arm_evpc_neon_vzip (d))
31902         return true;
31903       if (arm_evpc_neon_vrev (d))
31904         return true;
31905       if (arm_evpc_neon_vtrn (d))
31906         return true;
31907       return arm_evpc_neon_vtbl (d);
31908     }
31909   return false;
31910 }
31911
31912 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
31913
31914 static bool
31915 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
31916                               rtx target, rtx op0, rtx op1,
31917                               const vec_perm_indices &sel)
31918 {
31919   if (vmode != op_mode)
31920     return false;
31921
31922   struct expand_vec_perm_d d;
31923   int i, nelt, which;
31924
31925   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31926     return false;
31927
31928   d.target = target;
31929   if (op0)
31930     {
31931       rtx nop0 = force_reg (vmode, op0);
31932       if (op0 == op1)
31933         op1 = nop0;
31934       op0 = nop0;
31935     }
31936   if (op1)
31937     op1 = force_reg (vmode, op1);
31938   d.op0 = op0;
31939   d.op1 = op1;
31940
31941   d.vmode = vmode;
31942   gcc_assert (VECTOR_MODE_P (d.vmode));
31943   d.testing_p = !target;
31944
31945   nelt = GET_MODE_NUNITS (d.vmode);
31946   for (i = which = 0; i < nelt; ++i)
31947     {
31948       int ei = sel[i] & (2 * nelt - 1);
31949       which |= (ei < nelt ? 1 : 2);
31950     }
31951
31952   switch (which)
31953     {
31954     default:
31955       gcc_unreachable();
31956
31957     case 3:
31958       d.one_vector_p = false;
31959       if (d.testing_p || !rtx_equal_p (op0, op1))
31960         break;
31961
31962       /* The elements of PERM do not suggest that only the first operand
31963          is used, but both operands are identical.  Allow easier matching
31964          of the permutation by folding the permutation into the single
31965          input vector.  */
31966       /* FALLTHRU */
31967     case 2:
31968       d.op0 = op1;
31969       d.one_vector_p = true;
31970       break;
31971
31972     case 1:
31973       d.op1 = op0;
31974       d.one_vector_p = true;
31975       break;
31976     }
31977
31978   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31979
31980   if (!d.testing_p)
31981     return arm_expand_vec_perm_const_1 (&d);
31982
31983   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31984   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31985   if (!d.one_vector_p)
31986     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31987
31988   start_sequence ();
31989   bool ret = arm_expand_vec_perm_const_1 (&d);
31990   end_sequence ();
31991
31992   return ret;
31993 }
31994
31995 bool
31996 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31997 {
31998   /* If we are soft float and we do not have ldrd
31999      then all auto increment forms are ok.  */
32000   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32001     return true;
32002
32003   switch (code)
32004     {
32005       /* Post increment and Pre Decrement are supported for all
32006          instruction forms except for vector forms.  */
32007     case ARM_POST_INC:
32008     case ARM_PRE_DEC:
32009       if (VECTOR_MODE_P (mode))
32010         {
32011           if (code != ARM_PRE_DEC)
32012             return true;
32013           else
32014             return false;
32015         }
32016
32017       return true;
32018
32019     case ARM_POST_DEC:
32020     case ARM_PRE_INC:
32021       /* Without LDRD and mode size greater than
32022          word size, there is no point in auto-incrementing
32023          because ldm and stm will not have these forms.  */
32024       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32025         return false;
32026
32027       /* Vector and floating point modes do not support
32028          these auto increment forms.  */
32029       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32030         return false;
32031
32032       return true;
32033
32034     default:
32035       return false;
32036
32037     }
32038
32039   return false;
32040 }
32041
32042 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32043    on ARM, since we know that shifts by negative amounts are no-ops.
32044    Additionally, the default expansion code is not available or suitable
32045    for post-reload insn splits (this can occur when the register allocator
32046    chooses not to do a shift in NEON).
32047
32048    This function is used in both initial expand and post-reload splits, and
32049    handles all kinds of 64-bit shifts.
32050
32051    Input requirements:
32052     - It is safe for the input and output to be the same register, but
32053       early-clobber rules apply for the shift amount and scratch registers.
32054     - Shift by register requires both scratch registers.  In all other cases
32055       the scratch registers may be NULL.
32056     - Ashiftrt by a register also clobbers the CC register.  */
32057 void
32058 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32059                                rtx amount, rtx scratch1, rtx scratch2)
32060 {
32061   rtx out_high = gen_highpart (SImode, out);
32062   rtx out_low = gen_lowpart (SImode, out);
32063   rtx in_high = gen_highpart (SImode, in);
32064   rtx in_low = gen_lowpart (SImode, in);
32065
32066   /* Terminology:
32067         in = the register pair containing the input value.
32068         out = the destination register pair.
32069         up = the high- or low-part of each pair.
32070         down = the opposite part to "up".
32071      In a shift, we can consider bits to shift from "up"-stream to
32072      "down"-stream, so in a left-shift "up" is the low-part and "down"
32073      is the high-part of each register pair.  */
32074
32075   rtx out_up   = code == ASHIFT ? out_low : out_high;
32076   rtx out_down = code == ASHIFT ? out_high : out_low;
32077   rtx in_up   = code == ASHIFT ? in_low : in_high;
32078   rtx in_down = code == ASHIFT ? in_high : in_low;
32079
32080   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32081   gcc_assert (out
32082               && (REG_P (out) || SUBREG_P (out))
32083               && GET_MODE (out) == DImode);
32084   gcc_assert (in
32085               && (REG_P (in) || SUBREG_P (in))
32086               && GET_MODE (in) == DImode);
32087   gcc_assert (amount
32088               && (((REG_P (amount) || SUBREG_P (amount))
32089                    && GET_MODE (amount) == SImode)
32090                   || CONST_INT_P (amount)));
32091   gcc_assert (scratch1 == NULL
32092               || (GET_CODE (scratch1) == SCRATCH)
32093               || (GET_MODE (scratch1) == SImode
32094                   && REG_P (scratch1)));
32095   gcc_assert (scratch2 == NULL
32096               || (GET_CODE (scratch2) == SCRATCH)
32097               || (GET_MODE (scratch2) == SImode
32098                   && REG_P (scratch2)));
32099   gcc_assert (!REG_P (out) || !REG_P (amount)
32100               || !HARD_REGISTER_P (out)
32101               || (REGNO (out) != REGNO (amount)
32102                   && REGNO (out) + 1 != REGNO (amount)));
32103
32104   /* Macros to make following code more readable.  */
32105   #define SUB_32(DEST,SRC) \
32106             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32107   #define RSB_32(DEST,SRC) \
32108             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32109   #define SUB_S_32(DEST,SRC) \
32110             gen_addsi3_compare0 ((DEST), (SRC), \
32111                                  GEN_INT (-32))
32112   #define SET(DEST,SRC) \
32113             gen_rtx_SET ((DEST), (SRC))
32114   #define SHIFT(CODE,SRC,AMOUNT) \
32115             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32116   #define LSHIFT(CODE,SRC,AMOUNT) \
32117             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32118                             SImode, (SRC), (AMOUNT))
32119   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32120             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32121                             SImode, (SRC), (AMOUNT))
32122   #define ORR(A,B) \
32123             gen_rtx_IOR (SImode, (A), (B))
32124   #define BRANCH(COND,LABEL) \
32125             gen_arm_cond_branch ((LABEL), \
32126                                  gen_rtx_ ## COND (CCmode, cc_reg, \
32127                                                    const0_rtx), \
32128                                  cc_reg)
32129
32130   /* Shifts by register and shifts by constant are handled separately.  */
32131   if (CONST_INT_P (amount))
32132     {
32133       /* We have a shift-by-constant.  */
32134
32135       /* First, handle out-of-range shift amounts.
32136          In both cases we try to match the result an ARM instruction in a
32137          shift-by-register would give.  This helps reduce execution
32138          differences between optimization levels, but it won't stop other
32139          parts of the compiler doing different things.  This is "undefined
32140          behavior, in any case.  */
32141       if (INTVAL (amount) <= 0)
32142         emit_insn (gen_movdi (out, in));
32143       else if (INTVAL (amount) >= 64)
32144         {
32145           if (code == ASHIFTRT)
32146             {
32147               rtx const31_rtx = GEN_INT (31);
32148               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32149               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32150             }
32151           else
32152             emit_insn (gen_movdi (out, const0_rtx));
32153         }
32154
32155       /* Now handle valid shifts. */
32156       else if (INTVAL (amount) < 32)
32157         {
32158           /* Shifts by a constant less than 32.  */
32159           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32160
32161           /* Clearing the out register in DImode first avoids lots
32162              of spilling and results in less stack usage.
32163              Later this redundant insn is completely removed.
32164              Do that only if "in" and "out" are different registers.  */
32165           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32166             emit_insn (SET (out, const0_rtx));
32167           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32168           emit_insn (SET (out_down,
32169                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
32170                                out_down)));
32171           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32172         }
32173       else
32174         {
32175           /* Shifts by a constant greater than 31.  */
32176           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32177
32178           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32179             emit_insn (SET (out, const0_rtx));
32180           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32181           if (code == ASHIFTRT)
32182             emit_insn (gen_ashrsi3 (out_up, in_up,
32183                                     GEN_INT (31)));
32184           else
32185             emit_insn (SET (out_up, const0_rtx));
32186         }
32187     }
32188   else
32189     {
32190       /* We have a shift-by-register.  */
32191       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32192
32193       /* This alternative requires the scratch registers.  */
32194       gcc_assert (scratch1 && REG_P (scratch1));
32195       gcc_assert (scratch2 && REG_P (scratch2));
32196
32197       /* We will need the values "amount-32" and "32-amount" later.
32198          Swapping them around now allows the later code to be more general. */
32199       switch (code)
32200         {
32201         case ASHIFT:
32202           emit_insn (SUB_32 (scratch1, amount));
32203           emit_insn (RSB_32 (scratch2, amount));
32204           break;
32205         case ASHIFTRT:
32206           emit_insn (RSB_32 (scratch1, amount));
32207           /* Also set CC = amount > 32.  */
32208           emit_insn (SUB_S_32 (scratch2, amount));
32209           break;
32210         case LSHIFTRT:
32211           emit_insn (RSB_32 (scratch1, amount));
32212           emit_insn (SUB_32 (scratch2, amount));
32213           break;
32214         default:
32215           gcc_unreachable ();
32216         }
32217
32218       /* Emit code like this:
32219
32220          arithmetic-left:
32221             out_down = in_down << amount;
32222             out_down = (in_up << (amount - 32)) | out_down;
32223             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32224             out_up = in_up << amount;
32225
32226          arithmetic-right:
32227             out_down = in_down >> amount;
32228             out_down = (in_up << (32 - amount)) | out_down;
32229             if (amount < 32)
32230               out_down = ((signed)in_up >> (amount - 32)) | out_down;
32231             out_up = in_up << amount;
32232
32233          logical-right:
32234             out_down = in_down >> amount;
32235             out_down = (in_up << (32 - amount)) | out_down;
32236             if (amount < 32)
32237               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32238             out_up = in_up << amount;
32239
32240           The ARM and Thumb2 variants are the same but implemented slightly
32241           differently.  If this were only called during expand we could just
32242           use the Thumb2 case and let combine do the right thing, but this
32243           can also be called from post-reload splitters.  */
32244
32245       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32246
32247       if (!TARGET_THUMB2)
32248         {
32249           /* Emit code for ARM mode.  */
32250           emit_insn (SET (out_down,
32251                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32252           if (code == ASHIFTRT)
32253             {
32254               rtx_code_label *done_label = gen_label_rtx ();
32255               emit_jump_insn (BRANCH (LT, done_label));
32256               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32257                                              out_down)));
32258               emit_label (done_label);
32259             }
32260           else
32261             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32262                                            out_down)));
32263         }
32264       else
32265         {
32266           /* Emit code for Thumb2 mode.
32267              Thumb2 can't do shift and or in one insn.  */
32268           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32269           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32270
32271           if (code == ASHIFTRT)
32272             {
32273               rtx_code_label *done_label = gen_label_rtx ();
32274               emit_jump_insn (BRANCH (LT, done_label));
32275               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32276               emit_insn (SET (out_down, ORR (out_down, scratch2)));
32277               emit_label (done_label);
32278             }
32279           else
32280             {
32281               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32282               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32283             }
32284         }
32285
32286       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32287     }
32288
32289   #undef SUB_32
32290   #undef RSB_32
32291   #undef SUB_S_32
32292   #undef SET
32293   #undef SHIFT
32294   #undef LSHIFT
32295   #undef REV_LSHIFT
32296   #undef ORR
32297   #undef BRANCH
32298 }
32299
32300 /* Returns true if the pattern is a valid symbolic address, which is either a
32301    symbol_ref or (symbol_ref + addend).
32302
32303    According to the ARM ELF ABI, the initial addend of REL-type relocations
32304    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32305    literal field of the instruction as a 16-bit signed value in the range
32306    -32768 <= A < 32768.
32307
32308    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32309    unsigned range of 0 <= A < 256 as described in the AAELF32
32310    relocation handling documentation: REL-type relocations are encoded
32311    as unsigned in this case.  */
32312
32313 bool
32314 arm_valid_symbolic_address_p (rtx addr)
32315 {
32316   rtx xop0, xop1 = NULL_RTX;
32317   rtx tmp = addr;
32318
32319   if (target_word_relocations)
32320     return false;
32321
32322   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32323     return true;
32324
32325   /* (const (plus: symbol_ref const_int))  */
32326   if (GET_CODE (addr) == CONST)
32327     tmp = XEXP (addr, 0);
32328
32329   if (GET_CODE (tmp) == PLUS)
32330     {
32331       xop0 = XEXP (tmp, 0);
32332       xop1 = XEXP (tmp, 1);
32333
32334       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32335         {
32336           if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32337             return IN_RANGE (INTVAL (xop1), 0, 0xff);
32338           else
32339             return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32340         }
32341     }
32342
32343   return false;
32344 }
32345
32346 /* Returns true if a valid comparison operation and makes
32347    the operands in a form that is valid.  */
32348 bool
32349 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32350 {
32351   enum rtx_code code = GET_CODE (*comparison);
32352   int code_int;
32353   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32354     ? GET_MODE (*op2) : GET_MODE (*op1);
32355
32356   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32357
32358   if (code == UNEQ || code == LTGT)
32359     return false;
32360
32361   code_int = (int)code;
32362   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32363   PUT_CODE (*comparison, (enum rtx_code)code_int);
32364
32365   switch (mode)
32366     {
32367     case E_SImode:
32368       if (!arm_add_operand (*op1, mode))
32369         *op1 = force_reg (mode, *op1);
32370       if (!arm_add_operand (*op2, mode))
32371         *op2 = force_reg (mode, *op2);
32372       return true;
32373
32374     case E_DImode:
32375       /* gen_compare_reg() will sort out any invalid operands.  */
32376       return true;
32377
32378     case E_HFmode:
32379       if (!TARGET_VFP_FP16INST)
32380         break;
32381       /* FP16 comparisons are done in SF mode.  */
32382       mode = SFmode;
32383       *op1 = convert_to_mode (mode, *op1, 1);
32384       *op2 = convert_to_mode (mode, *op2, 1);
32385       /* Fall through.  */
32386     case E_SFmode:
32387     case E_DFmode:
32388       if (!vfp_compare_operand (*op1, mode))
32389         *op1 = force_reg (mode, *op1);
32390       if (!vfp_compare_operand (*op2, mode))
32391         *op2 = force_reg (mode, *op2);
32392       return true;
32393     default:
32394       break;
32395     }
32396
32397   return false;
32398
32399 }
32400
32401 /* Maximum number of instructions to set block of memory.  */
32402 static int
32403 arm_block_set_max_insns (void)
32404 {
32405   if (optimize_function_for_size_p (cfun))
32406     return 4;
32407   else
32408     return current_tune->max_insns_inline_memset;
32409 }
32410
32411 /* Return TRUE if it's profitable to set block of memory for
32412    non-vectorized case.  VAL is the value to set the memory
32413    with.  LENGTH is the number of bytes to set.  ALIGN is the
32414    alignment of the destination memory in bytes.  UNALIGNED_P
32415    is TRUE if we can only set the memory with instructions
32416    meeting alignment requirements.  USE_STRD_P is TRUE if we
32417    can use strd to set the memory.  */
32418 static bool
32419 arm_block_set_non_vect_profit_p (rtx val,
32420                                  unsigned HOST_WIDE_INT length,
32421                                  unsigned HOST_WIDE_INT align,
32422                                  bool unaligned_p, bool use_strd_p)
32423 {
32424   int num = 0;
32425   /* For leftovers in bytes of 0-7, we can set the memory block using
32426      strb/strh/str with minimum instruction number.  */
32427   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32428
32429   if (unaligned_p)
32430     {
32431       num = arm_const_inline_cost (SET, val);
32432       num += length / align + length % align;
32433     }
32434   else if (use_strd_p)
32435     {
32436       num = arm_const_double_inline_cost (val);
32437       num += (length >> 3) + leftover[length & 7];
32438     }
32439   else
32440     {
32441       num = arm_const_inline_cost (SET, val);
32442       num += (length >> 2) + leftover[length & 3];
32443     }
32444
32445   /* We may be able to combine last pair STRH/STRB into a single STR
32446      by shifting one byte back.  */
32447   if (unaligned_access && length > 3 && (length & 3) == 3)
32448     num--;
32449
32450   return (num <= arm_block_set_max_insns ());
32451 }
32452
32453 /* Return TRUE if it's profitable to set block of memory for
32454    vectorized case.  LENGTH is the number of bytes to set.
32455    ALIGN is the alignment of destination memory in bytes.
32456    MODE is the vector mode used to set the memory.  */
32457 static bool
32458 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32459                              unsigned HOST_WIDE_INT align,
32460                              machine_mode mode)
32461 {
32462   int num;
32463   bool unaligned_p = ((align & 3) != 0);
32464   unsigned int nelt = GET_MODE_NUNITS (mode);
32465
32466   /* Instruction loading constant value.  */
32467   num = 1;
32468   /* Instructions storing the memory.  */
32469   num += (length + nelt - 1) / nelt;
32470   /* Instructions adjusting the address expression.  Only need to
32471      adjust address expression if it's 4 bytes aligned and bytes
32472      leftover can only be stored by mis-aligned store instruction.  */
32473   if (!unaligned_p && (length & 3) != 0)
32474     num++;
32475
32476   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32477   if (!unaligned_p && mode == V16QImode)
32478     num--;
32479
32480   return (num <= arm_block_set_max_insns ());
32481 }
32482
32483 /* Set a block of memory using vectorization instructions for the
32484    unaligned case.  We fill the first LENGTH bytes of the memory
32485    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32486    the alignment requirement of memory.  Return TRUE if succeeded.  */
32487 static bool
32488 arm_block_set_unaligned_vect (rtx dstbase,
32489                               unsigned HOST_WIDE_INT length,
32490                               unsigned HOST_WIDE_INT value,
32491                               unsigned HOST_WIDE_INT align)
32492 {
32493   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32494   rtx dst, mem;
32495   rtx val_vec, reg;
32496   rtx (*gen_func) (rtx, rtx);
32497   machine_mode mode;
32498   unsigned HOST_WIDE_INT v = value;
32499   unsigned int offset = 0;
32500   gcc_assert ((align & 0x3) != 0);
32501   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32502   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32503   if (length >= nelt_v16)
32504     {
32505       mode = V16QImode;
32506       gen_func = gen_movmisalignv16qi;
32507     }
32508   else
32509     {
32510       mode = V8QImode;
32511       gen_func = gen_movmisalignv8qi;
32512     }
32513   nelt_mode = GET_MODE_NUNITS (mode);
32514   gcc_assert (length >= nelt_mode);
32515   /* Skip if it isn't profitable.  */
32516   if (!arm_block_set_vect_profit_p (length, align, mode))
32517     return false;
32518
32519   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32520   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32521
32522   v = sext_hwi (v, BITS_PER_WORD);
32523
32524   reg = gen_reg_rtx (mode);
32525   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32526   /* Emit instruction loading the constant value.  */
32527   emit_move_insn (reg, val_vec);
32528
32529   /* Handle nelt_mode bytes in a vector.  */
32530   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32531     {
32532       emit_insn ((*gen_func) (mem, reg));
32533       if (i + 2 * nelt_mode <= length)
32534         {
32535           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32536           offset += nelt_mode;
32537           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32538         }
32539     }
32540
32541   /* If there are not less than nelt_v8 bytes leftover, we must be in
32542      V16QI mode.  */
32543   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32544
32545   /* Handle (8, 16) bytes leftover.  */
32546   if (i + nelt_v8 < length)
32547     {
32548       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32549       offset += length - i;
32550       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32551
32552       /* We are shifting bytes back, set the alignment accordingly.  */
32553       if ((length & 1) != 0 && align >= 2)
32554         set_mem_align (mem, BITS_PER_UNIT);
32555
32556       emit_insn (gen_movmisalignv16qi (mem, reg));
32557     }
32558   /* Handle (0, 8] bytes leftover.  */
32559   else if (i < length && i + nelt_v8 >= length)
32560     {
32561       if (mode == V16QImode)
32562         reg = gen_lowpart (V8QImode, reg);
32563
32564       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32565                                               + (nelt_mode - nelt_v8))));
32566       offset += (length - i) + (nelt_mode - nelt_v8);
32567       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32568
32569       /* We are shifting bytes back, set the alignment accordingly.  */
32570       if ((length & 1) != 0 && align >= 2)
32571         set_mem_align (mem, BITS_PER_UNIT);
32572
32573       emit_insn (gen_movmisalignv8qi (mem, reg));
32574     }
32575
32576   return true;
32577 }
32578
32579 /* Set a block of memory using vectorization instructions for the
32580    aligned case.  We fill the first LENGTH bytes of the memory area
32581    starting from DSTBASE with byte constant VALUE.  ALIGN is the
32582    alignment requirement of memory.  Return TRUE if succeeded.  */
32583 static bool
32584 arm_block_set_aligned_vect (rtx dstbase,
32585                             unsigned HOST_WIDE_INT length,
32586                             unsigned HOST_WIDE_INT value,
32587                             unsigned HOST_WIDE_INT align)
32588 {
32589   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32590   rtx dst, addr, mem;
32591   rtx val_vec, reg;
32592   machine_mode mode;
32593   unsigned int offset = 0;
32594
32595   gcc_assert ((align & 0x3) == 0);
32596   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32597   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32598   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32599     mode = V16QImode;
32600   else
32601     mode = V8QImode;
32602
32603   nelt_mode = GET_MODE_NUNITS (mode);
32604   gcc_assert (length >= nelt_mode);
32605   /* Skip if it isn't profitable.  */
32606   if (!arm_block_set_vect_profit_p (length, align, mode))
32607     return false;
32608
32609   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32610
32611   reg = gen_reg_rtx (mode);
32612   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32613   /* Emit instruction loading the constant value.  */
32614   emit_move_insn (reg, val_vec);
32615
32616   i = 0;
32617   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
32618   if (mode == V16QImode)
32619     {
32620       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32621       emit_insn (gen_movmisalignv16qi (mem, reg));
32622       i += nelt_mode;
32623       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
32624       if (i + nelt_v8 < length && i + nelt_v16 > length)
32625         {
32626           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32627           offset += length - nelt_mode;
32628           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32629           /* We are shifting bytes back, set the alignment accordingly.  */
32630           if ((length & 0x3) == 0)
32631             set_mem_align (mem, BITS_PER_UNIT * 4);
32632           else if ((length & 0x1) == 0)
32633             set_mem_align (mem, BITS_PER_UNIT * 2);
32634           else
32635             set_mem_align (mem, BITS_PER_UNIT);
32636
32637           emit_insn (gen_movmisalignv16qi (mem, reg));
32638           return true;
32639         }
32640       /* Fall through for bytes leftover.  */
32641       mode = V8QImode;
32642       nelt_mode = GET_MODE_NUNITS (mode);
32643       reg = gen_lowpart (V8QImode, reg);
32644     }
32645
32646   /* Handle 8 bytes in a vector.  */
32647   for (; (i + nelt_mode <= length); i += nelt_mode)
32648     {
32649       addr = plus_constant (Pmode, dst, i);
32650       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32651       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32652         emit_move_insn (mem, reg);
32653       else
32654         emit_insn (gen_unaligned_storev8qi (mem, reg));
32655     }
32656
32657   /* Handle single word leftover by shifting 4 bytes back.  We can
32658      use aligned access for this case.  */
32659   if (i + UNITS_PER_WORD == length)
32660     {
32661       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32662       offset += i - UNITS_PER_WORD;
32663       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32664       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32665       if (align > UNITS_PER_WORD)
32666         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32667
32668       emit_insn (gen_unaligned_storev8qi (mem, reg));
32669     }
32670   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32671      We have to use unaligned access for this case.  */
32672   else if (i < length)
32673     {
32674       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32675       offset += length - nelt_mode;
32676       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32677       /* We are shifting bytes back, set the alignment accordingly.  */
32678       if ((length & 1) == 0)
32679         set_mem_align (mem, BITS_PER_UNIT * 2);
32680       else
32681         set_mem_align (mem, BITS_PER_UNIT);
32682
32683       emit_insn (gen_movmisalignv8qi (mem, reg));
32684     }
32685
32686   return true;
32687 }
32688
32689 /* Set a block of memory using plain strh/strb instructions, only
32690    using instructions allowed by ALIGN on processor.  We fill the
32691    first LENGTH bytes of the memory area starting from DSTBASE
32692    with byte constant VALUE.  ALIGN is the alignment requirement
32693    of memory.  */
32694 static bool
32695 arm_block_set_unaligned_non_vect (rtx dstbase,
32696                                   unsigned HOST_WIDE_INT length,
32697                                   unsigned HOST_WIDE_INT value,
32698                                   unsigned HOST_WIDE_INT align)
32699 {
32700   unsigned int i;
32701   rtx dst, addr, mem;
32702   rtx val_exp, val_reg, reg;
32703   machine_mode mode;
32704   HOST_WIDE_INT v = value;
32705
32706   gcc_assert (align == 1 || align == 2);
32707
32708   if (align == 2)
32709     v |= (value << BITS_PER_UNIT);
32710
32711   v = sext_hwi (v, BITS_PER_WORD);
32712   val_exp = GEN_INT (v);
32713   /* Skip if it isn't profitable.  */
32714   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32715                                         align, true, false))
32716     return false;
32717
32718   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32719   mode = (align == 2 ? HImode : QImode);
32720   val_reg = force_reg (SImode, val_exp);
32721   reg = gen_lowpart (mode, val_reg);
32722
32723   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32724     {
32725       addr = plus_constant (Pmode, dst, i);
32726       mem = adjust_automodify_address (dstbase, mode, addr, i);
32727       emit_move_insn (mem, reg);
32728     }
32729
32730   /* Handle single byte leftover.  */
32731   if (i + 1 == length)
32732     {
32733       reg = gen_lowpart (QImode, val_reg);
32734       addr = plus_constant (Pmode, dst, i);
32735       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32736       emit_move_insn (mem, reg);
32737       i++;
32738     }
32739
32740   gcc_assert (i == length);
32741   return true;
32742 }
32743
32744 /* Set a block of memory using plain strd/str/strh/strb instructions,
32745    to permit unaligned copies on processors which support unaligned
32746    semantics for those instructions.  We fill the first LENGTH bytes
32747    of the memory area starting from DSTBASE with byte constant VALUE.
32748    ALIGN is the alignment requirement of memory.  */
32749 static bool
32750 arm_block_set_aligned_non_vect (rtx dstbase,
32751                                 unsigned HOST_WIDE_INT length,
32752                                 unsigned HOST_WIDE_INT value,
32753                                 unsigned HOST_WIDE_INT align)
32754 {
32755   unsigned int i;
32756   rtx dst, addr, mem;
32757   rtx val_exp, val_reg, reg;
32758   unsigned HOST_WIDE_INT v;
32759   bool use_strd_p;
32760
32761   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32762                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32763
32764   v = (value | (value << 8) | (value << 16) | (value << 24));
32765   if (length < UNITS_PER_WORD)
32766     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32767
32768   if (use_strd_p)
32769     v |= (v << BITS_PER_WORD);
32770   else
32771     v = sext_hwi (v, BITS_PER_WORD);
32772
32773   val_exp = GEN_INT (v);
32774   /* Skip if it isn't profitable.  */
32775   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32776                                         align, false, use_strd_p))
32777     {
32778       if (!use_strd_p)
32779         return false;
32780
32781       /* Try without strd.  */
32782       v = (v >> BITS_PER_WORD);
32783       v = sext_hwi (v, BITS_PER_WORD);
32784       val_exp = GEN_INT (v);
32785       use_strd_p = false;
32786       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32787                                             align, false, use_strd_p))
32788         return false;
32789     }
32790
32791   i = 0;
32792   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32793   /* Handle double words using strd if possible.  */
32794   if (use_strd_p)
32795     {
32796       val_reg = force_reg (DImode, val_exp);
32797       reg = val_reg;
32798       for (; (i + 8 <= length); i += 8)
32799         {
32800           addr = plus_constant (Pmode, dst, i);
32801           mem = adjust_automodify_address (dstbase, DImode, addr, i);
32802           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32803             emit_move_insn (mem, reg);
32804           else
32805             emit_insn (gen_unaligned_storedi (mem, reg));
32806         }
32807     }
32808   else
32809     val_reg = force_reg (SImode, val_exp);
32810
32811   /* Handle words.  */
32812   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32813   for (; (i + 4 <= length); i += 4)
32814     {
32815       addr = plus_constant (Pmode, dst, i);
32816       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32817       if ((align & 3) == 0)
32818         emit_move_insn (mem, reg);
32819       else
32820         emit_insn (gen_unaligned_storesi (mem, reg));
32821     }
32822
32823   /* Merge last pair of STRH and STRB into a STR if possible.  */
32824   if (unaligned_access && i > 0 && (i + 3) == length)
32825     {
32826       addr = plus_constant (Pmode, dst, i - 1);
32827       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32828       /* We are shifting one byte back, set the alignment accordingly.  */
32829       if ((align & 1) == 0)
32830         set_mem_align (mem, BITS_PER_UNIT);
32831
32832       /* Most likely this is an unaligned access, and we can't tell at
32833          compilation time.  */
32834       emit_insn (gen_unaligned_storesi (mem, reg));
32835       return true;
32836     }
32837
32838   /* Handle half word leftover.  */
32839   if (i + 2 <= length)
32840     {
32841       reg = gen_lowpart (HImode, val_reg);
32842       addr = plus_constant (Pmode, dst, i);
32843       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32844       if ((align & 1) == 0)
32845         emit_move_insn (mem, reg);
32846       else
32847         emit_insn (gen_unaligned_storehi (mem, reg));
32848
32849       i += 2;
32850     }
32851
32852   /* Handle single byte leftover.  */
32853   if (i + 1 == length)
32854     {
32855       reg = gen_lowpart (QImode, val_reg);
32856       addr = plus_constant (Pmode, dst, i);
32857       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32858       emit_move_insn (mem, reg);
32859     }
32860
32861   return true;
32862 }
32863
32864 /* Set a block of memory using vectorization instructions for both
32865    aligned and unaligned cases.  We fill the first LENGTH bytes of
32866    the memory area starting from DSTBASE with byte constant VALUE.
32867    ALIGN is the alignment requirement of memory.  */
32868 static bool
32869 arm_block_set_vect (rtx dstbase,
32870                     unsigned HOST_WIDE_INT length,
32871                     unsigned HOST_WIDE_INT value,
32872                     unsigned HOST_WIDE_INT align)
32873 {
32874   /* Check whether we need to use unaligned store instruction.  */
32875   if (((align & 3) != 0 || (length & 3) != 0)
32876       /* Check whether unaligned store instruction is available.  */
32877       && (!unaligned_access || BYTES_BIG_ENDIAN))
32878     return false;
32879
32880   if ((align & 3) == 0)
32881     return arm_block_set_aligned_vect (dstbase, length, value, align);
32882   else
32883     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32884 }
32885
32886 /* Expand string store operation.  Firstly we try to do that by using
32887    vectorization instructions, then try with ARM unaligned access and
32888    double-word store if profitable.  OPERANDS[0] is the destination,
32889    OPERANDS[1] is the number of bytes, operands[2] is the value to
32890    initialize the memory, OPERANDS[3] is the known alignment of the
32891    destination.  */
32892 bool
32893 arm_gen_setmem (rtx *operands)
32894 {
32895   rtx dstbase = operands[0];
32896   unsigned HOST_WIDE_INT length;
32897   unsigned HOST_WIDE_INT value;
32898   unsigned HOST_WIDE_INT align;
32899
32900   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32901     return false;
32902
32903   length = UINTVAL (operands[1]);
32904   if (length > 64)
32905     return false;
32906
32907   value = (UINTVAL (operands[2]) & 0xFF);
32908   align = UINTVAL (operands[3]);
32909   if (TARGET_NEON && length >= 8
32910       && current_tune->string_ops_prefer_neon
32911       && arm_block_set_vect (dstbase, length, value, align))
32912     return true;
32913
32914   if (!unaligned_access && (align & 3) != 0)
32915     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32916
32917   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32918 }
32919
32920
32921 static bool
32922 arm_macro_fusion_p (void)
32923 {
32924   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32925 }
32926
32927 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32928    for MOVW / MOVT macro fusion.  */
32929
32930 static bool
32931 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32932 {
32933   /* We are trying to fuse
32934      movw imm / movt imm
32935     instructions as a group that gets scheduled together.  */
32936
32937   rtx set_dest = SET_DEST (curr_set);
32938
32939   if (GET_MODE (set_dest) != SImode)
32940     return false;
32941
32942   /* We are trying to match:
32943      prev (movw)  == (set (reg r0) (const_int imm16))
32944      curr (movt) == (set (zero_extract (reg r0)
32945                                         (const_int 16)
32946                                         (const_int 16))
32947                           (const_int imm16_1))
32948      or
32949      prev (movw) == (set (reg r1)
32950                           (high (symbol_ref ("SYM"))))
32951     curr (movt) == (set (reg r0)
32952                         (lo_sum (reg r1)
32953                                 (symbol_ref ("SYM"))))  */
32954
32955     if (GET_CODE (set_dest) == ZERO_EXTRACT)
32956       {
32957         if (CONST_INT_P (SET_SRC (curr_set))
32958             && CONST_INT_P (SET_SRC (prev_set))
32959             && REG_P (XEXP (set_dest, 0))
32960             && REG_P (SET_DEST (prev_set))
32961             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32962           return true;
32963
32964       }
32965     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32966              && REG_P (SET_DEST (curr_set))
32967              && REG_P (SET_DEST (prev_set))
32968              && GET_CODE (SET_SRC (prev_set)) == HIGH
32969              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32970       return true;
32971
32972   return false;
32973 }
32974
32975 static bool
32976 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32977 {
32978   rtx prev_set = single_set (prev);
32979   rtx curr_set = single_set (curr);
32980
32981   if (!prev_set
32982       || !curr_set)
32983     return false;
32984
32985   if (any_condjump_p (curr))
32986     return false;
32987
32988   if (!arm_macro_fusion_p ())
32989     return false;
32990
32991   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32992       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32993     return true;
32994
32995   return false;
32996 }
32997
32998 /* Return true iff the instruction fusion described by OP is enabled.  */
32999 bool
33000 arm_fusion_enabled_p (tune_params::fuse_ops op)
33001 {
33002   return current_tune->fusible_ops & op;
33003 }
33004
33005 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
33006    scheduled for speculative execution.  Reject the long-running division
33007    and square-root instructions.  */
33008
33009 static bool
33010 arm_sched_can_speculate_insn (rtx_insn *insn)
33011 {
33012   switch (get_attr_type (insn))
33013     {
33014       case TYPE_SDIV:
33015       case TYPE_UDIV:
33016       case TYPE_FDIVS:
33017       case TYPE_FDIVD:
33018       case TYPE_FSQRTS:
33019       case TYPE_FSQRTD:
33020       case TYPE_NEON_FP_SQRT_S:
33021       case TYPE_NEON_FP_SQRT_D:
33022       case TYPE_NEON_FP_SQRT_S_Q:
33023       case TYPE_NEON_FP_SQRT_D_Q:
33024       case TYPE_NEON_FP_DIV_S:
33025       case TYPE_NEON_FP_DIV_D:
33026       case TYPE_NEON_FP_DIV_S_Q:
33027       case TYPE_NEON_FP_DIV_D_Q:
33028         return false;
33029       default:
33030         return true;
33031     }
33032 }
33033
33034 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
33035
33036 static unsigned HOST_WIDE_INT
33037 arm_asan_shadow_offset (void)
33038 {
33039   return HOST_WIDE_INT_1U << 29;
33040 }
33041
33042
33043 /* This is a temporary fix for PR60655.  Ideally we need
33044    to handle most of these cases in the generic part but
33045    currently we reject minus (..) (sym_ref).  We try to
33046    ameliorate the case with minus (sym_ref1) (sym_ref2)
33047    where they are in the same section.  */
33048
33049 static bool
33050 arm_const_not_ok_for_debug_p (rtx p)
33051 {
33052   tree decl_op0 = NULL;
33053   tree decl_op1 = NULL;
33054
33055   if (GET_CODE (p) == UNSPEC)
33056     return true;
33057   if (GET_CODE (p) == MINUS)
33058     {
33059       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33060         {
33061           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33062           if (decl_op1
33063               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33064               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33065             {
33066               if ((VAR_P (decl_op1)
33067                    || TREE_CODE (decl_op1) == CONST_DECL)
33068                   && (VAR_P (decl_op0)
33069                       || TREE_CODE (decl_op0) == CONST_DECL))
33070                 return (get_variable_section (decl_op1, false)
33071                         != get_variable_section (decl_op0, false));
33072
33073               if (TREE_CODE (decl_op1) == LABEL_DECL
33074                   && TREE_CODE (decl_op0) == LABEL_DECL)
33075                 return (DECL_CONTEXT (decl_op1)
33076                         != DECL_CONTEXT (decl_op0));
33077             }
33078
33079           return true;
33080         }
33081     }
33082
33083   return false;
33084 }
33085
33086 /* return TRUE if x is a reference to a value in a constant pool */
33087 extern bool
33088 arm_is_constant_pool_ref (rtx x)
33089 {
33090   return (MEM_P (x)
33091           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33092           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33093 }
33094
33095 /* Remember the last target of arm_set_current_function.  */
33096 static GTY(()) tree arm_previous_fndecl;
33097
33098 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
33099
33100 void
33101 save_restore_target_globals (tree new_tree)
33102 {
33103   /* If we have a previous state, use it.  */
33104   if (TREE_TARGET_GLOBALS (new_tree))
33105     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33106   else if (new_tree == target_option_default_node)
33107     restore_target_globals (&default_target_globals);
33108   else
33109     {
33110       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
33111       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33112     }
33113
33114   arm_option_params_internal ();
33115 }
33116
33117 /* Invalidate arm_previous_fndecl.  */
33118
33119 void
33120 arm_reset_previous_fndecl (void)
33121 {
33122   arm_previous_fndecl = NULL_TREE;
33123 }
33124
33125 /* Establish appropriate back-end context for processing the function
33126    FNDECL.  The argument might be NULL to indicate processing at top
33127    level, outside of any function scope.  */
33128
33129 static void
33130 arm_set_current_function (tree fndecl)
33131 {
33132   if (!fndecl || fndecl == arm_previous_fndecl)
33133     return;
33134
33135   tree old_tree = (arm_previous_fndecl
33136                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33137                    : NULL_TREE);
33138
33139   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33140
33141   /* If current function has no attributes but previous one did,
33142      use the default node.  */
33143   if (! new_tree && old_tree)
33144     new_tree = target_option_default_node;
33145
33146   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
33147      the default have been handled by save_restore_target_globals from
33148      arm_pragma_target_parse.  */
33149   if (old_tree == new_tree)
33150     return;
33151
33152   arm_previous_fndecl = fndecl;
33153
33154   /* First set the target options.  */
33155   cl_target_option_restore (&global_options, &global_options_set,
33156                             TREE_TARGET_OPTION (new_tree));
33157
33158   save_restore_target_globals (new_tree);
33159
33160   arm_override_options_after_change_1 (&global_options, &global_options_set);
33161 }
33162
33163 /* Implement TARGET_OPTION_PRINT.  */
33164
33165 static void
33166 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33167 {
33168   int flags = ptr->x_target_flags;
33169   const char *fpu_name;
33170
33171   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33172               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33173
33174   fprintf (file, "%*sselected isa %s\n", indent, "",
33175            TARGET_THUMB2_P (flags) ? "thumb2" :
33176            TARGET_THUMB_P (flags) ? "thumb1" :
33177            "arm");
33178
33179   if (ptr->x_arm_arch_string)
33180     fprintf (file, "%*sselected architecture %s\n", indent, "",
33181              ptr->x_arm_arch_string);
33182
33183   if (ptr->x_arm_cpu_string)
33184     fprintf (file, "%*sselected CPU %s\n", indent, "",
33185              ptr->x_arm_cpu_string);
33186
33187   if (ptr->x_arm_tune_string)
33188     fprintf (file, "%*sselected tune %s\n", indent, "",
33189              ptr->x_arm_tune_string);
33190
33191   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33192 }
33193
33194 /* Hook to determine if one function can safely inline another.  */
33195
33196 static bool
33197 arm_can_inline_p (tree caller, tree callee)
33198 {
33199   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33200   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33201   bool can_inline = true;
33202
33203   struct cl_target_option *caller_opts
33204         = TREE_TARGET_OPTION (caller_tree ? caller_tree
33205                                            : target_option_default_node);
33206
33207   struct cl_target_option *callee_opts
33208         = TREE_TARGET_OPTION (callee_tree ? callee_tree
33209                                            : target_option_default_node);
33210
33211   if (callee_opts == caller_opts)
33212     return true;
33213
33214   /* Callee's ISA features should be a subset of the caller's.  */
33215   struct arm_build_target caller_target;
33216   struct arm_build_target callee_target;
33217   caller_target.isa = sbitmap_alloc (isa_num_bits);
33218   callee_target.isa = sbitmap_alloc (isa_num_bits);
33219
33220   arm_configure_build_target (&caller_target, caller_opts, false);
33221   arm_configure_build_target (&callee_target, callee_opts, false);
33222   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33223     can_inline = false;
33224
33225   sbitmap_free (caller_target.isa);
33226   sbitmap_free (callee_target.isa);
33227
33228   /* OK to inline between different modes.
33229      Function with mode specific instructions, e.g using asm,
33230      must be explicitly protected with noinline.  */
33231   return can_inline;
33232 }
33233
33234 /* Hook to fix function's alignment affected by target attribute.  */
33235
33236 static void
33237 arm_relayout_function (tree fndecl)
33238 {
33239   if (DECL_USER_ALIGN (fndecl))
33240     return;
33241
33242   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33243
33244   if (!callee_tree)
33245     callee_tree = target_option_default_node;
33246
33247   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33248   SET_DECL_ALIGN
33249     (fndecl,
33250      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33251 }
33252
33253 /* Inner function to process the attribute((target(...))), take an argument and
33254    set the current options from the argument.  If we have a list, recursively
33255    go over the list.  */
33256
33257 static bool
33258 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33259 {
33260   if (TREE_CODE (args) == TREE_LIST)
33261     {
33262       bool ret = true;
33263
33264       for (; args; args = TREE_CHAIN (args))
33265         if (TREE_VALUE (args)
33266             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33267           ret = false;
33268       return ret;
33269     }
33270
33271   else if (TREE_CODE (args) != STRING_CST)
33272     {
33273       error ("attribute %<target%> argument not a string");
33274       return false;
33275     }
33276
33277   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33278   char *q;
33279
33280   while ((q = strtok (argstr, ",")) != NULL)
33281     {
33282       argstr = NULL;
33283       if (!strcmp (q, "thumb"))
33284         {
33285           opts->x_target_flags |= MASK_THUMB;
33286           if (TARGET_FDPIC && !arm_arch_thumb2)
33287             sorry ("FDPIC mode is not supported in Thumb-1 mode");
33288         }
33289
33290       else if (!strcmp (q, "arm"))
33291         opts->x_target_flags &= ~MASK_THUMB;
33292
33293       else if (!strcmp (q, "general-regs-only"))
33294         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33295
33296       else if (startswith (q, "fpu="))
33297         {
33298           int fpu_index;
33299           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33300                                        &fpu_index, CL_TARGET))
33301             {
33302               error ("invalid fpu for target attribute or pragma %qs", q);
33303               return false;
33304             }
33305           if (fpu_index == TARGET_FPU_auto)
33306             {
33307               /* This doesn't really make sense until we support
33308                  general dynamic selection of the architecture and all
33309                  sub-features.  */
33310               sorry ("auto fpu selection not currently permitted here");
33311               return false;
33312             }
33313           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33314         }
33315       else if (startswith (q, "arch="))
33316         {
33317           char *arch = q + 5;
33318           const arch_option *arm_selected_arch
33319              = arm_parse_arch_option_name (all_architectures, "arch", arch);
33320
33321           if (!arm_selected_arch)
33322             {
33323               error ("invalid architecture for target attribute or pragma %qs",
33324                      q);
33325               return false;
33326             }
33327
33328           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33329         }
33330       else if (q[0] == '+')
33331         {
33332           opts->x_arm_arch_string
33333             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33334         }
33335       else
33336         {
33337           error ("unknown target attribute or pragma %qs", q);
33338           return false;
33339         }
33340     }
33341
33342   return true;
33343 }
33344
33345 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
33346
33347 tree
33348 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33349                                  struct gcc_options *opts_set)
33350 {
33351   struct cl_target_option cl_opts;
33352
33353   if (!arm_valid_target_attribute_rec (args, opts))
33354     return NULL_TREE;
33355
33356   cl_target_option_save (&cl_opts, opts, opts_set);
33357   arm_configure_build_target (&arm_active_target, &cl_opts, false);
33358   arm_option_check_internal (opts);
33359   /* Do any overrides, such as global options arch=xxx.
33360      We do this since arm_active_target was overridden.  */
33361   arm_option_reconfigure_globals ();
33362   arm_options_perform_arch_sanity_checks ();
33363   arm_option_override_internal (opts, opts_set);
33364
33365   return build_target_option_node (opts, opts_set);
33366 }
33367
33368 static void
33369 add_attribute (const char * mode, tree *attributes)
33370 {
33371   size_t len = strlen (mode);
33372   tree value = build_string (len, mode);
33373
33374   TREE_TYPE (value) = build_array_type (char_type_node,
33375                                         build_index_type (size_int (len)));
33376
33377   *attributes = tree_cons (get_identifier ("target"),
33378                            build_tree_list (NULL_TREE, value),
33379                            *attributes);
33380 }
33381
33382 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33383
33384 static void
33385 arm_insert_attributes (tree fndecl, tree * attributes)
33386 {
33387   const char *mode;
33388
33389   if (! TARGET_FLIP_THUMB)
33390     return;
33391
33392   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33393       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33394    return;
33395
33396   /* Nested definitions must inherit mode.  */
33397   if (current_function_decl)
33398    {
33399      mode = TARGET_THUMB ? "thumb" : "arm";
33400      add_attribute (mode, attributes);
33401      return;
33402    }
33403
33404   /* If there is already a setting don't change it.  */
33405   if (lookup_attribute ("target", *attributes) != NULL)
33406     return;
33407
33408   mode = thumb_flipper ? "thumb" : "arm";
33409   add_attribute (mode, attributes);
33410
33411   thumb_flipper = !thumb_flipper;
33412 }
33413
33414 /* Hook to validate attribute((target("string"))).  */
33415
33416 static bool
33417 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33418                               tree args, int ARG_UNUSED (flags))
33419 {
33420   bool ret = true;
33421   struct gcc_options func_options, func_options_set;
33422   tree cur_tree, new_optimize;
33423   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33424
33425   /* Get the optimization options of the current function.  */
33426   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33427
33428   /* If the function changed the optimization levels as well as setting target
33429      options, start with the optimizations specified.  */
33430   if (!func_optimize)
33431     func_optimize = optimization_default_node;
33432
33433   /* Init func_options.  */
33434   memset (&func_options, 0, sizeof (func_options));
33435   init_options_struct (&func_options, NULL);
33436   lang_hooks.init_options_struct (&func_options);
33437   memset (&func_options_set, 0, sizeof (func_options_set));
33438
33439   /* Initialize func_options to the defaults.  */
33440   cl_optimization_restore (&func_options, &func_options_set,
33441                            TREE_OPTIMIZATION (func_optimize));
33442
33443   cl_target_option_restore (&func_options, &func_options_set,
33444                             TREE_TARGET_OPTION (target_option_default_node));
33445
33446   /* Set func_options flags with new target mode.  */
33447   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33448                                               &func_options_set);
33449
33450   if (cur_tree == NULL_TREE)
33451     ret = false;
33452
33453   new_optimize = build_optimization_node (&func_options, &func_options_set);
33454
33455   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33456
33457   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33458
33459   return ret;
33460 }
33461
33462 /* Match an ISA feature bitmap to a named FPU.  We always use the
33463    first entry that exactly matches the feature set, so that we
33464    effectively canonicalize the FPU name for the assembler.  */
33465 static const char*
33466 arm_identify_fpu_from_isa (sbitmap isa)
33467 {
33468   auto_sbitmap fpubits (isa_num_bits);
33469   auto_sbitmap cand_fpubits (isa_num_bits);
33470
33471   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33472
33473   /* If there are no ISA feature bits relating to the FPU, we must be
33474      doing soft-float.  */
33475   if (bitmap_empty_p (fpubits))
33476     return "softvfp";
33477
33478   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33479     {
33480       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33481       if (bitmap_equal_p (fpubits, cand_fpubits))
33482         return all_fpus[i].name;
33483     }
33484   /* We must find an entry, or things have gone wrong.  */
33485   gcc_unreachable ();
33486 }
33487
33488 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33489    by the function fndecl.  */
33490 void
33491 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33492 {
33493   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33494
33495   struct cl_target_option *targ_options;
33496   if (target_parts)
33497     targ_options = TREE_TARGET_OPTION (target_parts);
33498   else
33499     targ_options = TREE_TARGET_OPTION (target_option_current_node);
33500   gcc_assert (targ_options);
33501
33502   arm_print_asm_arch_directives (stream, targ_options);
33503
33504   fprintf (stream, "\t.syntax unified\n");
33505
33506   if (TARGET_THUMB)
33507     {
33508       if (is_called_in_ARM_mode (decl)
33509           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33510               && cfun->is_thunk))
33511         fprintf (stream, "\t.code 32\n");
33512       else if (TARGET_THUMB1)
33513         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33514       else
33515         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33516     }
33517   else
33518     fprintf (stream, "\t.arm\n");
33519
33520   if (TARGET_POKE_FUNCTION_NAME)
33521     arm_poke_function_name (stream, (const char *) name);
33522 }
33523
33524 /* If MEM is in the form of [base+offset], extract the two parts
33525    of address and set to BASE and OFFSET, otherwise return false
33526    after clearing BASE and OFFSET.  */
33527
33528 static bool
33529 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33530 {
33531   rtx addr;
33532
33533   gcc_assert (MEM_P (mem));
33534
33535   addr = XEXP (mem, 0);
33536
33537   /* Strip off const from addresses like (const (addr)).  */
33538   if (GET_CODE (addr) == CONST)
33539     addr = XEXP (addr, 0);
33540
33541   if (REG_P (addr))
33542     {
33543       *base = addr;
33544       *offset = const0_rtx;
33545       return true;
33546     }
33547
33548   if (GET_CODE (addr) == PLUS
33549       && GET_CODE (XEXP (addr, 0)) == REG
33550       && CONST_INT_P (XEXP (addr, 1)))
33551     {
33552       *base = XEXP (addr, 0);
33553       *offset = XEXP (addr, 1);
33554       return true;
33555     }
33556
33557   *base = NULL_RTX;
33558   *offset = NULL_RTX;
33559
33560   return false;
33561 }
33562
33563 /* If INSN is a load or store of address in the form of [base+offset],
33564    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
33565    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
33566    otherwise return FALSE.  */
33567
33568 static bool
33569 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33570 {
33571   rtx x, dest, src;
33572
33573   gcc_assert (INSN_P (insn));
33574   x = PATTERN (insn);
33575   if (GET_CODE (x) != SET)
33576     return false;
33577
33578   src = SET_SRC (x);
33579   dest = SET_DEST (x);
33580   if (REG_P (src) && MEM_P (dest))
33581     {
33582       *is_load = false;
33583       extract_base_offset_in_addr (dest, base, offset);
33584     }
33585   else if (MEM_P (src) && REG_P (dest))
33586     {
33587       *is_load = true;
33588       extract_base_offset_in_addr (src, base, offset);
33589     }
33590   else
33591     return false;
33592
33593   return (*base != NULL_RTX && *offset != NULL_RTX);
33594 }
33595
33596 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33597
33598    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33599    and PRI are only calculated for these instructions.  For other instruction,
33600    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33601    instruction fusion can be supported by returning different priorities.
33602
33603    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33604
33605 static void
33606 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33607                            int *fusion_pri, int *pri)
33608 {
33609   int tmp, off_val;
33610   bool is_load;
33611   rtx base, offset;
33612
33613   gcc_assert (INSN_P (insn));
33614
33615   tmp = max_pri - 1;
33616   if (!fusion_load_store (insn, &base, &offset, &is_load))
33617     {
33618       *pri = tmp;
33619       *fusion_pri = tmp;
33620       return;
33621     }
33622
33623   /* Load goes first.  */
33624   if (is_load)
33625     *fusion_pri = tmp - 1;
33626   else
33627     *fusion_pri = tmp - 2;
33628
33629   tmp /= 2;
33630
33631   /* INSN with smaller base register goes first.  */
33632   tmp -= ((REGNO (base) & 0xff) << 20);
33633
33634   /* INSN with smaller offset goes first.  */
33635   off_val = (int)(INTVAL (offset));
33636   if (off_val >= 0)
33637     tmp -= (off_val & 0xfffff);
33638   else
33639     tmp += ((- off_val) & 0xfffff);
33640
33641   *pri = tmp;
33642   return;
33643 }
33644
33645
33646 /* Construct and return a PARALLEL RTX vector with elements numbering the
33647    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33648    the vector - from the perspective of the architecture.  This does not
33649    line up with GCC's perspective on lane numbers, so we end up with
33650    different masks depending on our target endian-ness.  The diagram
33651    below may help.  We must draw the distinction when building masks
33652    which select one half of the vector.  An instruction selecting
33653    architectural low-lanes for a big-endian target, must be described using
33654    a mask selecting GCC high-lanes.
33655
33656                  Big-Endian             Little-Endian
33657
33658 GCC             0   1   2   3           3   2   1   0
33659               | x | x | x | x |       | x | x | x | x |
33660 Architecture    3   2   1   0           3   2   1   0
33661
33662 Low Mask:         { 2, 3 }                { 0, 1 }
33663 High Mask:        { 0, 1 }                { 2, 3 }
33664 */
33665
33666 rtx
33667 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33668 {
33669   int nunits = GET_MODE_NUNITS (mode);
33670   rtvec v = rtvec_alloc (nunits / 2);
33671   int high_base = nunits / 2;
33672   int low_base = 0;
33673   int base;
33674   rtx t1;
33675   int i;
33676
33677   if (BYTES_BIG_ENDIAN)
33678     base = high ? low_base : high_base;
33679   else
33680     base = high ? high_base : low_base;
33681
33682   for (i = 0; i < nunits / 2; i++)
33683     RTVEC_ELT (v, i) = GEN_INT (base + i);
33684
33685   t1 = gen_rtx_PARALLEL (mode, v);
33686   return t1;
33687 }
33688
33689 /* Check OP for validity as a PARALLEL RTX vector with elements
33690    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33691    from the perspective of the architecture.  See the diagram above
33692    arm_simd_vect_par_cnst_half_p for more details.  */
33693
33694 bool
33695 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33696                                        bool high)
33697 {
33698   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33699   HOST_WIDE_INT count_op = XVECLEN (op, 0);
33700   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33701   int i = 0;
33702
33703   if (!VECTOR_MODE_P (mode))
33704     return false;
33705
33706   if (count_op != count_ideal)
33707     return false;
33708
33709   for (i = 0; i < count_ideal; i++)
33710     {
33711       rtx elt_op = XVECEXP (op, 0, i);
33712       rtx elt_ideal = XVECEXP (ideal, 0, i);
33713
33714       if (!CONST_INT_P (elt_op)
33715           || INTVAL (elt_ideal) != INTVAL (elt_op))
33716         return false;
33717     }
33718   return true;
33719 }
33720
33721 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33722    in Thumb1.  */
33723 static bool
33724 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33725                          const_tree)
33726 {
33727   /* For now, we punt and not handle this for TARGET_THUMB1.  */
33728   if (vcall_offset && TARGET_THUMB1)
33729     return false;
33730
33731   /* Otherwise ok.  */
33732   return true;
33733 }
33734
33735 /* Generate RTL for a conditional branch with rtx comparison CODE in
33736    mode CC_MODE. The destination of the unlikely conditional branch
33737    is LABEL_REF.  */
33738
33739 void
33740 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33741                           rtx label_ref)
33742 {
33743   rtx x;
33744   x = gen_rtx_fmt_ee (code, VOIDmode,
33745                       gen_rtx_REG (cc_mode, CC_REGNUM),
33746                       const0_rtx);
33747
33748   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33749                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
33750                             pc_rtx);
33751   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33752 }
33753
33754 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33755
33756    For pure-code sections there is no letter code for this attribute, so
33757    output all the section flags numerically when this is needed.  */
33758
33759 static bool
33760 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33761 {
33762
33763   if (flags & SECTION_ARM_PURECODE)
33764     {
33765       *num = 0x20000000;
33766
33767       if (!(flags & SECTION_DEBUG))
33768         *num |= 0x2;
33769       if (flags & SECTION_EXCLUDE)
33770         *num |= 0x80000000;
33771       if (flags & SECTION_WRITE)
33772         *num |= 0x1;
33773       if (flags & SECTION_CODE)
33774         *num |= 0x4;
33775       if (flags & SECTION_MERGE)
33776         *num |= 0x10;
33777       if (flags & SECTION_STRINGS)
33778         *num |= 0x20;
33779       if (flags & SECTION_TLS)
33780         *num |= 0x400;
33781       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33782         *num |= 0x200;
33783
33784         return true;
33785     }
33786
33787   return false;
33788 }
33789
33790 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33791
33792    If pure-code is passed as an option, make sure all functions are in
33793    sections that have the SHF_ARM_PURECODE attribute.  */
33794
33795 static section *
33796 arm_function_section (tree decl, enum node_frequency freq,
33797                       bool startup, bool exit)
33798 {
33799   const char * section_name;
33800   section * sec;
33801
33802   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33803     return default_function_section (decl, freq, startup, exit);
33804
33805   if (!target_pure_code)
33806     return default_function_section (decl, freq, startup, exit);
33807
33808
33809   section_name = DECL_SECTION_NAME (decl);
33810
33811   /* If a function is not in a named section then it falls under the 'default'
33812      text section, also known as '.text'.  We can preserve previous behavior as
33813      the default text section already has the SHF_ARM_PURECODE section
33814      attribute.  */
33815   if (!section_name)
33816     {
33817       section *default_sec = default_function_section (decl, freq, startup,
33818                                                        exit);
33819
33820       /* If default_sec is not null, then it must be a special section like for
33821          example .text.startup.  We set the pure-code attribute and return the
33822          same section to preserve existing behavior.  */
33823       if (default_sec)
33824           default_sec->common.flags |= SECTION_ARM_PURECODE;
33825       return default_sec;
33826     }
33827
33828   /* Otherwise look whether a section has already been created with
33829      'section_name'.  */
33830   sec = get_named_section (decl, section_name, 0);
33831   if (!sec)
33832     /* If that is not the case passing NULL as the section's name to
33833        'get_named_section' will create a section with the declaration's
33834        section name.  */
33835     sec = get_named_section (decl, NULL, 0);
33836
33837   /* Set the SHF_ARM_PURECODE attribute.  */
33838   sec->common.flags |= SECTION_ARM_PURECODE;
33839
33840   return sec;
33841 }
33842
33843 /* Implements the TARGET_SECTION_FLAGS hook.
33844
33845    If DECL is a function declaration and pure-code is passed as an option
33846    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
33847    section's name and RELOC indicates whether the declarations initializer may
33848    contain runtime relocations.  */
33849
33850 static unsigned int
33851 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33852 {
33853   unsigned int flags = default_section_type_flags (decl, name, reloc);
33854
33855   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33856     flags |= SECTION_ARM_PURECODE;
33857
33858   return flags;
33859 }
33860
33861 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
33862
33863 static void
33864 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33865                            rtx op0, rtx op1,
33866                            rtx *quot_p, rtx *rem_p)
33867 {
33868   if (mode == SImode)
33869     gcc_assert (!TARGET_IDIV);
33870
33871   scalar_int_mode libval_mode
33872     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33873
33874   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33875                                         libval_mode, op0, mode, op1, mode);
33876
33877   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33878   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33879                                        GET_MODE_SIZE (mode));
33880
33881   gcc_assert (quotient);
33882   gcc_assert (remainder);
33883
33884   *quot_p = quotient;
33885   *rem_p = remainder;
33886 }
33887
33888 /*  This function checks for the availability of the coprocessor builtin passed
33889     in BUILTIN for the current target.  Returns true if it is available and
33890     false otherwise.  If a BUILTIN is passed for which this function has not
33891     been implemented it will cause an exception.  */
33892
33893 bool
33894 arm_coproc_builtin_available (enum unspecv builtin)
33895 {
33896   /* None of these builtins are available in Thumb mode if the target only
33897      supports Thumb-1.  */
33898   if (TARGET_THUMB1)
33899     return false;
33900
33901   switch (builtin)
33902     {
33903       case VUNSPEC_CDP:
33904       case VUNSPEC_LDC:
33905       case VUNSPEC_LDCL:
33906       case VUNSPEC_STC:
33907       case VUNSPEC_STCL:
33908       case VUNSPEC_MCR:
33909       case VUNSPEC_MRC:
33910         if (arm_arch4)
33911           return true;
33912         break;
33913       case VUNSPEC_CDP2:
33914       case VUNSPEC_LDC2:
33915       case VUNSPEC_LDC2L:
33916       case VUNSPEC_STC2:
33917       case VUNSPEC_STC2L:
33918       case VUNSPEC_MCR2:
33919       case VUNSPEC_MRC2:
33920         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33921            ARMv8-{A,M}.  */
33922         if (arm_arch5t)
33923           return true;
33924         break;
33925       case VUNSPEC_MCRR:
33926       case VUNSPEC_MRRC:
33927         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33928            ARMv8-{A,M}.  */
33929         if (arm_arch6 || arm_arch5te)
33930           return true;
33931         break;
33932       case VUNSPEC_MCRR2:
33933       case VUNSPEC_MRRC2:
33934         if (arm_arch6)
33935           return true;
33936         break;
33937       default:
33938         gcc_unreachable ();
33939     }
33940   return false;
33941 }
33942
33943 /* This function returns true if OP is a valid memory operand for the ldc and
33944    stc coprocessor instructions and false otherwise.  */
33945
33946 bool
33947 arm_coproc_ldc_stc_legitimate_address (rtx op)
33948 {
33949   HOST_WIDE_INT range;
33950   /* Has to be a memory operand.  */
33951   if (!MEM_P (op))
33952     return false;
33953
33954   op = XEXP (op, 0);
33955
33956   /* We accept registers.  */
33957   if (REG_P (op))
33958     return true;
33959
33960   switch GET_CODE (op)
33961     {
33962       case PLUS:
33963         {
33964           /* Or registers with an offset.  */
33965           if (!REG_P (XEXP (op, 0)))
33966             return false;
33967
33968           op = XEXP (op, 1);
33969
33970           /* The offset must be an immediate though.  */
33971           if (!CONST_INT_P (op))
33972             return false;
33973
33974           range = INTVAL (op);
33975
33976           /* Within the range of [-1020,1020].  */
33977           if (!IN_RANGE (range, -1020, 1020))
33978             return false;
33979
33980           /* And a multiple of 4.  */
33981           return (range % 4) == 0;
33982         }
33983       case PRE_INC:
33984       case POST_INC:
33985       case PRE_DEC:
33986       case POST_DEC:
33987         return REG_P (XEXP (op, 0));
33988       default:
33989         gcc_unreachable ();
33990     }
33991   return false;
33992 }
33993
33994 /* Return the diagnostic message string if conversion from FROMTYPE to
33995    TOTYPE is not allowed, NULL otherwise.  */
33996
33997 static const char *
33998 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33999 {
34000   if (element_mode (fromtype) != element_mode (totype))
34001     {
34002       /* Do no allow conversions to/from BFmode scalar types.  */
34003       if (TYPE_MODE (fromtype) == BFmode)
34004         return N_("invalid conversion from type %<bfloat16_t%>");
34005       if (TYPE_MODE (totype) == BFmode)
34006         return N_("invalid conversion to type %<bfloat16_t%>");
34007     }
34008
34009   /* Conversion allowed.  */
34010   return NULL;
34011 }
34012
34013 /* Return the diagnostic message string if the unary operation OP is
34014    not permitted on TYPE, NULL otherwise.  */
34015
34016 static const char *
34017 arm_invalid_unary_op (int op, const_tree type)
34018 {
34019   /* Reject all single-operand operations on BFmode except for &.  */
34020   if (element_mode (type) == BFmode && op != ADDR_EXPR)
34021     return N_("operation not permitted on type %<bfloat16_t%>");
34022
34023   /* Operation allowed.  */
34024   return NULL;
34025 }
34026
34027 /* Return the diagnostic message string if the binary operation OP is
34028    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
34029
34030 static const char *
34031 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34032                            const_tree type2)
34033 {
34034   /* Reject all 2-operand operations on BFmode.  */
34035   if (element_mode (type1) == BFmode
34036       || element_mode (type2) == BFmode)
34037     return N_("operation not permitted on type %<bfloat16_t%>");
34038
34039   /* Operation allowed.  */
34040   return NULL;
34041 }
34042
34043 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34044
34045    In VFPv1, VFP registers could only be accessed in the mode they were
34046    set, so subregs would be invalid there.  However, we don't support
34047    VFPv1 at the moment, and the restriction was lifted in VFPv2.
34048
34049    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34050    VFP registers in little-endian order.  We can't describe that accurately to
34051    GCC, so avoid taking subregs of such values.
34052
34053    The only exception is going from a 128-bit to a 64-bit type.  In that
34054    case the data layout happens to be consistent for big-endian, so we
34055    explicitly allow that case.  */
34056
34057 static bool
34058 arm_can_change_mode_class (machine_mode from, machine_mode to,
34059                            reg_class_t rclass)
34060 {
34061   if (TARGET_BIG_END
34062       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34063       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34064           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34065       && reg_classes_intersect_p (VFP_REGS, rclass))
34066     return false;
34067   return true;
34068 }
34069
34070 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
34071    strcpy from constants will be faster.  */
34072
34073 static HOST_WIDE_INT
34074 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34075 {
34076   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34077   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34078     return MAX (align, BITS_PER_WORD * factor);
34079   return align;
34080 }
34081
34082 /* Emit a speculation barrier on target architectures that do not have
34083    DSB/ISB directly.  Such systems probably don't need a barrier
34084    themselves, but if the code is ever run on a later architecture, it
34085    might become a problem.  */
34086 void
34087 arm_emit_speculation_barrier_function ()
34088 {
34089   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34090 }
34091
34092 /* Have we recorded an explicit access to the Q bit of APSR?.  */
34093 bool
34094 arm_q_bit_access (void)
34095 {
34096   if (cfun && cfun->decl)
34097     return lookup_attribute ("acle qbit",
34098                              DECL_ATTRIBUTES (cfun->decl));
34099   return true;
34100 }
34101
34102 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
34103 bool
34104 arm_ge_bits_access (void)
34105 {
34106   if (cfun && cfun->decl)
34107     return lookup_attribute ("acle gebits",
34108                              DECL_ATTRIBUTES (cfun->decl));
34109   return true;
34110 }
34111
34112 /* NULL if insn INSN is valid within a low-overhead loop.
34113    Otherwise return why doloop cannot be applied.  */
34114
34115 static const char *
34116 arm_invalid_within_doloop (const rtx_insn *insn)
34117 {
34118   if (!TARGET_HAVE_LOB)
34119     return default_invalid_within_doloop (insn);
34120
34121   if (CALL_P (insn))
34122     return "Function call in the loop.";
34123
34124   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34125     return "LR is used inside loop.";
34126
34127   return NULL;
34128 }
34129
34130 bool
34131 arm_target_insn_ok_for_lob (rtx insn)
34132 {
34133   basic_block bb = BLOCK_FOR_INSN (insn);
34134   /* Make sure the basic block of the target insn is a simple latch
34135      having as single predecessor and successor the body of the loop
34136      itself.  Only simple loops with a single basic block as body are
34137      supported for 'low over head loop' making sure that LE target is
34138      above LE itself in the generated code.  */
34139
34140   return single_succ_p (bb)
34141     && single_pred_p (bb)
34142     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34143     && contains_no_active_insn_p (bb);
34144 }
34145
34146 #if CHECKING_P
34147 namespace selftest {
34148
34149 /* Scan the static data tables generated by parsecpu.awk looking for
34150    potential issues with the data.  We primarily check for
34151    inconsistencies in the option extensions at present (extensions
34152    that duplicate others but aren't marked as aliases).  Furthermore,
34153    for correct canonicalization later options must never be a subset
34154    of an earlier option.  Any extension should also only specify other
34155    feature bits and never an architecture bit.  The architecture is inferred
34156    from the declaration of the extension.  */
34157 static void
34158 arm_test_cpu_arch_data (void)
34159 {
34160   const arch_option *arch;
34161   const cpu_option *cpu;
34162   auto_sbitmap target_isa (isa_num_bits);
34163   auto_sbitmap isa1 (isa_num_bits);
34164   auto_sbitmap isa2 (isa_num_bits);
34165
34166   for (arch = all_architectures; arch->common.name != NULL; ++arch)
34167     {
34168       const cpu_arch_extension *ext1, *ext2;
34169
34170       if (arch->common.extensions == NULL)
34171         continue;
34172
34173       arm_initialize_isa (target_isa, arch->common.isa_bits);
34174
34175       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34176         {
34177           if (ext1->alias)
34178             continue;
34179
34180           arm_initialize_isa (isa1, ext1->isa_bits);
34181           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34182             {
34183               if (ext2->alias || ext1->remove != ext2->remove)
34184                 continue;
34185
34186               arm_initialize_isa (isa2, ext2->isa_bits);
34187               /* If the option is a subset of the parent option, it doesn't
34188                  add anything and so isn't useful.  */
34189               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34190
34191               /* If the extension specifies any architectural bits then
34192                  disallow it.  Extensions should only specify feature bits.  */
34193               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34194             }
34195         }
34196     }
34197
34198   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34199     {
34200       const cpu_arch_extension *ext1, *ext2;
34201
34202       if (cpu->common.extensions == NULL)
34203         continue;
34204
34205       arm_initialize_isa (target_isa, arch->common.isa_bits);
34206
34207       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34208         {
34209           if (ext1->alias)
34210             continue;
34211
34212           arm_initialize_isa (isa1, ext1->isa_bits);
34213           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34214             {
34215               if (ext2->alias || ext1->remove != ext2->remove)
34216                 continue;
34217
34218               arm_initialize_isa (isa2, ext2->isa_bits);
34219               /* If the option is a subset of the parent option, it doesn't
34220                  add anything and so isn't useful.  */
34221               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34222
34223               /* If the extension specifies any architectural bits then
34224                  disallow it.  Extensions should only specify feature bits.  */
34225               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34226             }
34227         }
34228     }
34229 }
34230
34231 /* Scan the static data tables generated by parsecpu.awk looking for
34232    potential issues with the data.  Here we check for consistency between the
34233    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34234    a feature bit that is not defined by any FPU flag.  */
34235 static void
34236 arm_test_fpu_data (void)
34237 {
34238   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34239   auto_sbitmap fpubits (isa_num_bits);
34240   auto_sbitmap tmpset (isa_num_bits);
34241
34242   static const enum isa_feature fpu_bitlist_internal[]
34243     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34244   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34245
34246   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34247   {
34248     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34249     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34250     bitmap_clear (isa_all_fpubits_internal);
34251     bitmap_copy (isa_all_fpubits_internal, tmpset);
34252   }
34253
34254   if (!bitmap_empty_p (isa_all_fpubits_internal))
34255     {
34256         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34257                          " group that are not defined by any FPU.\n"
34258                          "       Check your arm-cpus.in.\n");
34259         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34260     }
34261 }
34262
34263 static void
34264 arm_run_selftests (void)
34265 {
34266   arm_test_cpu_arch_data ();
34267   arm_test_fpu_data ();
34268 }
34269 } /* Namespace selftest.  */
34270
34271 #undef TARGET_RUN_TARGET_SELFTESTS
34272 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34273 #endif /* CHECKING_P */
34274
34275 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34276    global variable based guard use the default else
34277    return a null tree.  */
34278 static tree
34279 arm_stack_protect_guard (void)
34280 {
34281   if (arm_stack_protector_guard == SSP_GLOBAL)
34282     return default_stack_protect_guard ();
34283
34284   return NULL_TREE;
34285 }
34286
34287 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34288    Unlike the arm version, we do NOT implement asm flag outputs.  */
34289
34290 rtx_insn *
34291 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34292                       vec<machine_mode> & /*input_modes*/,
34293                       vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
34294                       HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34295 {
34296   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34297     if (startswith (constraints[i], "=@cc"))
34298       {
34299         sorry ("%<asm%> flags not supported in thumb1 mode");
34300         break;
34301       }
34302   return NULL;
34303 }
34304
34305 /* Generate code to enable conditional branches in functions over 1 MiB.
34306    Parameters are:
34307      operands: is the operands list of the asm insn (see arm_cond_branch or
34308        arm_cond_branch_reversed).
34309      pos_label: is an index into the operands array where operands[pos_label] is
34310        the asm label of the final jump destination.
34311      dest: is a string which is used to generate the asm label of the intermediate
34312        destination
34313    branch_format: is a string denoting the intermediate branch format, e.g.
34314      "beq", "bne", etc.  */
34315
34316 const char *
34317 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34318                     const char * branch_format)
34319 {
34320   rtx_code_label * tmp_label = gen_label_rtx ();
34321   char label_buf[256];
34322   char buffer[128];
34323   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34324                         CODE_LABEL_NUMBER (tmp_label));
34325   const char *label_ptr = arm_strip_name_encoding (label_buf);
34326   rtx dest_label = operands[pos_label];
34327   operands[pos_label] = tmp_label;
34328
34329   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34330   output_asm_insn (buffer, operands);
34331
34332   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34333   operands[pos_label] = dest_label;
34334   output_asm_insn (buffer, operands);
34335   return "";
34336 }
34337
34338 /* If given mode matches, load from memory to LO_REGS.
34339    (i.e [Rn], Rn <= LO_REGS).  */
34340 enum reg_class
34341 arm_mode_base_reg_class (machine_mode mode)
34342 {
34343   if (TARGET_HAVE_MVE
34344       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34345     return LO_REGS;
34346
34347   return MODE_BASE_REG_REG_CLASS (mode);
34348 }
34349
34350 struct gcc_target targetm = TARGET_INITIALIZER;
34351
34352 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
34353
34354 opt_machine_mode
34355 arm_get_mask_mode (machine_mode mode)
34356 {
34357   if (TARGET_HAVE_MVE)
34358     return arm_mode_to_pred_mode (mode);
34359
34360   return default_get_mask_mode (mode);
34361 }
34362
34363 #include "gt-arm.h"