gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2019 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "df.h"
  36 #include "tm_p.h"
  37 #include "stringpool.h"
  38 #include "attribs.h"
  39 #include "optabs.h"
  40 #include "regs.h"
  41 #include "emit-rtl.h"
  42 #include "recog.h"
  43 #include "cgraph.h"
  44 #include "diagnostic-core.h"
  45 #include "alias.h"
  46 #include "fold-const.h"
  47 #include "stor-layout.h"
  48 #include "calls.h"
  49 #include "varasm.h"
  50 #include "output.h"
  51 #include "insn-attr.h"
  52 #include "flags.h"
  53 #include "reload.h"
  54 #include "explow.h"
  55 #include "expr.h"
  56 #include "cfgrtl.h"
  57 #include "sched-int.h"
  58 #include "common/common-target.h"
  59 #include "langhooks.h"
  60 #include "intl.h"
  61 #include "libfuncs.h"
  62 #include "params.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73
  74 /* This file should be included last.  */
  75 #include "target-def.h"
  76
  77 /* Forward definitions of types.  */
  78 typedef struct minipool_node    Mnode;
  79 typedef struct minipool_fixup   Mfix;
  80
  81 /* The last .arch and .fpu assembly strings that we printed.  */
  82 static std::string arm_last_printed_arch_string;
  83 static std::string arm_last_printed_fpu_string;
  84
  85 void (*arm_lang_output_object_attributes_hook)(void);
  86
  87 struct four_ints
  88 {
  89   int i[4];
  90 };
  91
  92 /* Forward function declarations.  */
  93 static bool arm_const_not_ok_for_debug_p (rtx);
  94 static int arm_needs_doubleword_align (machine_mode, const_tree);
  95 static int arm_compute_static_chain_stack_bytes (void);
  96 static arm_stack_offsets *arm_get_frame_offsets (void);
  97 static void arm_compute_frame_layout (void);
  98 static void arm_add_gc_roots (void);
  99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 100                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 101 static unsigned bit_count (unsigned long);
 102 static unsigned bitmap_popcount (const sbitmap);
 103 static int arm_address_register_rtx_p (rtx, int);
 104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 105 static bool is_called_in_ARM_mode (tree);
 106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 111 inline static int thumb1_index_register_rtx_p (rtx, int);
 112 static int thumb_far_jump_used_p (void);
 113 static bool thumb_force_lr_save (void);
 114 static unsigned arm_size_return_regs (void);
 115 static bool arm_assemble_integer (rtx, unsigned int, int);
 116 static void arm_print_operand (FILE *, rtx, int);
 117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 118 static bool arm_print_operand_punct_valid_p (unsigned char code);
 119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 120 static arm_cc get_arm_condition_code (rtx);
 121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 122 static const char *output_multi_immediate (rtx *, const char *, const char *,
 123                                            int, HOST_WIDE_INT);
 124 static const char *shift_op (rtx, HOST_WIDE_INT *);
 125 static struct machine_function *arm_init_machine_status (void);
 126 static void thumb_exit (FILE *, int);
 127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 129 static Mnode *add_minipool_forward_ref (Mfix *);
 130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 131 static Mnode *add_minipool_backward_ref (Mfix *);
 132 static void assign_minipool_offsets (Mfix *);
 133 static void arm_print_value (FILE *, rtx);
 134 static void dump_minipool (rtx_insn *);
 135 static int arm_barrier_cost (rtx_insn *);
 136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 139                                machine_mode, rtx);
 140 static void arm_reorg (void);
 141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 143 static unsigned long arm_compute_save_core_reg_mask (void);
 144 static unsigned long arm_isr_value (tree);
 145 static unsigned long arm_compute_func_type (void);
 146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 151 #endif
 152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 154 static void arm_output_function_epilogue (FILE *);
 155 static void arm_output_function_prologue (FILE *);
 156 static int arm_comp_type_attributes (const_tree, const_tree);
 157 static void arm_set_default_type_attributes (tree);
 158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 160 static int optimal_immediate_sequence (enum rtx_code code,
 161                                        unsigned HOST_WIDE_INT val,
 162                                        struct four_ints *return_sequence);
 163 static int optimal_immediate_sequence_1 (enum rtx_code code,
 164                                          unsigned HOST_WIDE_INT val,
 165                                          struct four_ints *return_sequence,
 166                                          int i);
 167 static int arm_get_strip_length (int);
 168 static bool arm_function_ok_for_sibcall (tree, tree);
 169 static machine_mode arm_promote_function_mode (const_tree,
 170                                                     machine_mode, int *,
 171                                                     const_tree, int);
 172 static bool arm_return_in_memory (const_tree, const_tree);
 173 static rtx arm_function_value (const_tree, const_tree, bool);
 174 static rtx arm_libcall_value_1 (machine_mode);
 175 static rtx arm_libcall_value (machine_mode, const_rtx);
 176 static bool arm_function_value_regno_p (const unsigned int);
 177 static void arm_internal_label (FILE *, const char *, unsigned long);
 178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 179                                  tree);
 180 static bool arm_have_conditional_execution (void);
 181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 182 static bool arm_legitimate_constant_p (machine_mode, rtx);
 183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 187 static void emit_constant_insn (rtx cond, rtx pattern);
 188 static rtx_insn *emit_set_insn (rtx, rtx);
 189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 190 static int arm_arg_partial_bytes (cumulative_args_t,
 191                                   const function_arg_info &);
 192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 193                              const_tree, bool);
 194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 195                                       const_tree, bool);
 196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 199                                       const_tree);
 200 static rtx aapcs_libcall_value (machine_mode);
 201 static int aapcs_select_return_coproc (const_tree, const_tree);
 202
 203 #ifdef OBJECT_FORMAT_ELF
 204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 206 #endif
 207 #ifndef ARM_PE
 208 static void arm_encode_section_info (tree, rtx, int);
 209 #endif
 210
 211 static void arm_file_end (void);
 212 static void arm_file_start (void);
 213 static void arm_insert_attributes (tree, tree *);
 214
 215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 216                                         tree, int *, int);
 217 static bool arm_pass_by_reference (cumulative_args_t,
 218                                    const function_arg_info &);
 219 static bool arm_promote_prototypes (const_tree);
 220 static bool arm_default_short_enums (void);
 221 static bool arm_align_anon_bitfield (void);
 222 static bool arm_return_in_msb (const_tree);
 223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 224 static bool arm_return_in_memory (const_tree, const_tree);
 225 #if ARM_UNWIND_INFO
 226 static void arm_unwind_emit (FILE *, rtx_insn *);
 227 static bool arm_output_ttype (rtx);
 228 static void arm_asm_emit_except_personality (rtx);
 229 #endif
 230 static void arm_asm_init_sections (void);
 231 static rtx arm_dwarf_register_span (rtx);
 232
 233 static tree arm_cxx_guard_type (void);
 234 static bool arm_cxx_guard_mask_bit (void);
 235 static tree arm_get_cookie_size (tree);
 236 static bool arm_cookie_has_size (void);
 237 static bool arm_cxx_cdtor_returns_this (void);
 238 static bool arm_cxx_key_method_may_be_inline (void);
 239 static void arm_cxx_determine_class_data_visibility (tree);
 240 static bool arm_cxx_class_data_always_comdat (void);
 241 static bool arm_cxx_use_aeabi_atexit (void);
 242 static void arm_init_libfuncs (void);
 243 static tree arm_build_builtin_va_list (void);
 244 static void arm_expand_builtin_va_start (tree, rtx);
 245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 246 static void arm_option_override (void);
 247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 248 static void arm_option_restore (struct gcc_options *,
 249                                 struct cl_target_option *);
 250 static void arm_override_options_after_change (void);
 251 static void arm_option_print (FILE *, int, struct cl_target_option *);
 252 static void arm_set_current_function (tree);
 253 static bool arm_can_inline_p (tree, tree);
 254 static void arm_relayout_function (tree);
 255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 257 static bool arm_sched_can_speculate_insn (rtx_insn *);
 258 static bool arm_macro_fusion_p (void);
 259 static bool arm_cannot_copy_insn_p (rtx_insn *);
 260 static int arm_issue_rate (void);
 261 static int arm_first_cycle_multipass_dfa_lookahead (void);
 262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 264 static bool arm_output_addr_const_extra (FILE *, rtx);
 265 static bool arm_allocate_stack_slots_for_args (void);
 266 static bool arm_warn_func_return (tree);
 267 static tree arm_promoted_type (const_tree t);
 268 static bool arm_scalar_mode_supported_p (scalar_mode);
 269 static bool arm_frame_pointer_required (void);
 270 static bool arm_can_eliminate (const int, const int);
 271 static void arm_asm_trampoline_template (FILE *);
 272 static void arm_trampoline_init (rtx, tree, rtx);
 273 static rtx arm_trampoline_adjust_address (rtx);
 274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 278 static bool arm_array_mode_supported_p (machine_mode,
 279                                         unsigned HOST_WIDE_INT);
 280 static machine_mode arm_preferred_simd_mode (scalar_mode);
 281 static bool arm_class_likely_spilled_p (reg_class_t);
 282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 285                                                      const_tree type,
 286                                                      int misalignment,
 287                                                      bool is_packed);
 288 static void arm_conditional_register_usage (void);
 289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 291 static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
 292 static int arm_default_branch_cost (bool, bool);
 293 static int arm_cortex_a5_branch_cost (bool, bool);
 294 static int arm_cortex_m_branch_cost (bool, bool);
 295 static int arm_cortex_m7_branch_cost (bool, bool);
 296
 297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
 298                                           const vec_perm_indices &);
 299
 300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 301
 302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 303                                            tree vectype,
 304                                            int misalign ATTRIBUTE_UNUSED);
 305 static unsigned arm_add_stmt_cost (void *data, int count,
 306                                    enum vect_cost_for_stmt kind,
 307                                    struct _stmt_vec_info *stmt_info,
 308                                    int misalign,
 309                                    enum vect_cost_model_location where);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314
 315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 317                                      const_tree);
 318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 321                                                 int reloc);
 322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 328 \f
 329 /* Table of machine attributes.  */
 330 static const struct attribute_spec arm_attribute_table[] =
 331 {
 332   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 333        affects_type_identity, handler, exclude } */
 334   /* Function calls made to this symbol must be done indirectly, because
 335      it may lie outside of the 26 bit addressing range of a normal function
 336      call.  */
 337   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 338   /* Whereas these functions are always known to reside within the 26 bit
 339      addressing range.  */
 340   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 341   /* Specify the procedure call conventions for a function.  */
 342   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 343     NULL },
 344   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 345   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 346     NULL },
 347   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 348     NULL },
 349   { "naked",        0, 0, true,  false, false, false,
 350     arm_handle_fndecl_attribute, NULL },
 351 #ifdef ARM_PE
 352   /* ARM/PE has three new attributes:
 353      interfacearm - ?
 354      dllexport - for exporting a function/variable that will live in a dll
 355      dllimport - for importing a function/variable from a dll
 356
 357      Microsoft allows multiple declspecs in one __declspec, separating
 358      them with spaces.  We do NOT support this.  Instead, use __declspec
 359      multiple times.
 360   */
 361   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 362   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 363   { "interfacearm", 0, 0, true,  false, false, false,
 364     arm_handle_fndecl_attribute, NULL },
 365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 366   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 367     NULL },
 368   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 369     NULL },
 370   { "notshared",    0, 0, false, true, false, false,
 371     arm_handle_notshared_attribute, NULL },
 372 #endif
 373   /* ARMv8-M Security Extensions support.  */
 374   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 375     arm_handle_cmse_nonsecure_entry, NULL },
 376   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
 377     arm_handle_cmse_nonsecure_call, NULL },
 378   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 379 };
 380 \f
 381 /* Initialize the GCC target structure.  */
 382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 383 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 385 #endif
 386
 387 #undef TARGET_LEGITIMIZE_ADDRESS
 388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 389
 390 #undef  TARGET_ATTRIBUTE_TABLE
 391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 392
 393 #undef  TARGET_INSERT_ATTRIBUTES
 394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 395
 396 #undef TARGET_ASM_FILE_START
 397 #define TARGET_ASM_FILE_START arm_file_start
 398 #undef TARGET_ASM_FILE_END
 399 #define TARGET_ASM_FILE_END arm_file_end
 400
 401 #undef  TARGET_ASM_ALIGNED_SI_OP
 402 #define TARGET_ASM_ALIGNED_SI_OP NULL
 403 #undef  TARGET_ASM_INTEGER
 404 #define TARGET_ASM_INTEGER arm_assemble_integer
 405
 406 #undef TARGET_PRINT_OPERAND
 407 #define TARGET_PRINT_OPERAND arm_print_operand
 408 #undef TARGET_PRINT_OPERAND_ADDRESS
 409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 412
 413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 415
 416 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 418
 419 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 421
 422 #undef TARGET_CAN_INLINE_P
 423 #define TARGET_CAN_INLINE_P arm_can_inline_p
 424
 425 #undef TARGET_RELAYOUT_FUNCTION
 426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 427
 428 #undef  TARGET_OPTION_OVERRIDE
 429 #define TARGET_OPTION_OVERRIDE arm_option_override
 430
 431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 433
 434 #undef TARGET_OPTION_SAVE
 435 #define TARGET_OPTION_SAVE arm_option_save
 436
 437 #undef TARGET_OPTION_RESTORE
 438 #define TARGET_OPTION_RESTORE arm_option_restore
 439
 440 #undef TARGET_OPTION_PRINT
 441 #define TARGET_OPTION_PRINT arm_option_print
 442
 443 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 445
 446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 448
 449 #undef TARGET_SCHED_MACRO_FUSION_P
 450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 451
 452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 454
 455 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 457
 458 #undef  TARGET_SCHED_ADJUST_COST
 459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 460
 461 #undef TARGET_SET_CURRENT_FUNCTION
 462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 463
 464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 466
 467 #undef TARGET_SCHED_REORDER
 468 #define TARGET_SCHED_REORDER arm_sched_reorder
 469
 470 #undef TARGET_REGISTER_MOVE_COST
 471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 472
 473 #undef TARGET_MEMORY_MOVE_COST
 474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 475
 476 #undef TARGET_ENCODE_SECTION_INFO
 477 #ifdef ARM_PE
 478 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 479 #else
 480 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 481 #endif
 482
 483 #undef  TARGET_STRIP_NAME_ENCODING
 484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 485
 486 #undef  TARGET_ASM_INTERNAL_LABEL
 487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 488
 489 #undef TARGET_FLOATN_MODE
 490 #define TARGET_FLOATN_MODE arm_floatn_mode
 491
 492 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 494
 495 #undef  TARGET_FUNCTION_VALUE
 496 #define TARGET_FUNCTION_VALUE arm_function_value
 497
 498 #undef  TARGET_LIBCALL_VALUE
 499 #define TARGET_LIBCALL_VALUE arm_libcall_value
 500
 501 #undef TARGET_FUNCTION_VALUE_REGNO_P
 502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 503
 504 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 506 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 508
 509 #undef  TARGET_RTX_COSTS
 510 #define TARGET_RTX_COSTS arm_rtx_costs
 511 #undef  TARGET_ADDRESS_COST
 512 #define TARGET_ADDRESS_COST arm_address_cost
 513
 514 #undef TARGET_SHIFT_TRUNCATION_MASK
 515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 524   arm_autovectorize_vector_sizes
 525
 526 #undef  TARGET_MACHINE_DEPENDENT_REORG
 527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 528
 529 #undef  TARGET_INIT_BUILTINS
 530 #define TARGET_INIT_BUILTINS  arm_init_builtins
 531 #undef  TARGET_EXPAND_BUILTIN
 532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 533 #undef  TARGET_BUILTIN_DECL
 534 #define TARGET_BUILTIN_DECL arm_builtin_decl
 535
 536 #undef TARGET_INIT_LIBFUNCS
 537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 538
 539 #undef TARGET_PROMOTE_FUNCTION_MODE
 540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 541 #undef TARGET_PROMOTE_PROTOTYPES
 542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 543 #undef TARGET_PASS_BY_REFERENCE
 544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 545 #undef TARGET_ARG_PARTIAL_BYTES
 546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 547 #undef TARGET_FUNCTION_ARG
 548 #define TARGET_FUNCTION_ARG arm_function_arg
 549 #undef TARGET_FUNCTION_ARG_ADVANCE
 550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 551 #undef TARGET_FUNCTION_ARG_PADDING
 552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 553 #undef TARGET_FUNCTION_ARG_BOUNDARY
 554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 555
 556 #undef  TARGET_SETUP_INCOMING_VARARGS
 557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 558
 559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 561
 562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 564 #undef TARGET_TRAMPOLINE_INIT
 565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 568
 569 #undef TARGET_WARN_FUNC_RETURN
 570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 571
 572 #undef TARGET_DEFAULT_SHORT_ENUMS
 573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 574
 575 #undef TARGET_ALIGN_ANON_BITFIELD
 576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 577
 578 #undef TARGET_NARROW_VOLATILE_BITFIELD
 579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 580
 581 #undef TARGET_CXX_GUARD_TYPE
 582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 583
 584 #undef TARGET_CXX_GUARD_MASK_BIT
 585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 586
 587 #undef TARGET_CXX_GET_COOKIE_SIZE
 588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 589
 590 #undef TARGET_CXX_COOKIE_HAS_SIZE
 591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 592
 593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 595
 596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 598
 599 #undef TARGET_CXX_USE_AEABI_ATEXIT
 600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 601
 602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 604   arm_cxx_determine_class_data_visibility
 605
 606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 608
 609 #undef TARGET_RETURN_IN_MSB
 610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 611
 612 #undef TARGET_RETURN_IN_MEMORY
 613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 614
 615 #undef TARGET_MUST_PASS_IN_STACK
 616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 617
 618 #if ARM_UNWIND_INFO
 619 #undef TARGET_ASM_UNWIND_EMIT
 620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 621
 622 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 623 #undef TARGET_ASM_TTYPE
 624 #define TARGET_ASM_TTYPE arm_output_ttype
 625
 626 #undef TARGET_ARM_EABI_UNWINDER
 627 #define TARGET_ARM_EABI_UNWINDER true
 628
 629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 631
 632 #endif /* ARM_UNWIND_INFO */
 633
 634 #undef TARGET_ASM_INIT_SECTIONS
 635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 636
 637 #undef TARGET_DWARF_REGISTER_SPAN
 638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 639
 640 #undef  TARGET_CANNOT_COPY_INSN_P
 641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 642
 643 #ifdef HAVE_AS_TLS
 644 #undef TARGET_HAVE_TLS
 645 #define TARGET_HAVE_TLS true
 646 #endif
 647
 648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 650
 651 #undef TARGET_LEGITIMATE_CONSTANT_P
 652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 653
 654 #undef TARGET_CANNOT_FORCE_CONST_MEM
 655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 656
 657 #undef TARGET_MAX_ANCHOR_OFFSET
 658 #define TARGET_MAX_ANCHOR_OFFSET 4095
 659
 660 /* The minimum is set such that the total size of the block
 661    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 662    divisible by eight, ensuring natural spacing of anchors.  */
 663 #undef TARGET_MIN_ANCHOR_OFFSET
 664 #define TARGET_MIN_ANCHOR_OFFSET -4088
 665
 666 #undef TARGET_SCHED_ISSUE_RATE
 667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 668
 669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 671   arm_first_cycle_multipass_dfa_lookahead
 672
 673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 675   arm_first_cycle_multipass_dfa_lookahead_guard
 676
 677 #undef TARGET_MANGLE_TYPE
 678 #define TARGET_MANGLE_TYPE arm_mangle_type
 679
 680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 682
 683 #undef TARGET_BUILD_BUILTIN_VA_LIST
 684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 685 #undef TARGET_EXPAND_BUILTIN_VA_START
 686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 689
 690 #ifdef HAVE_AS_TLS
 691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 693 #endif
 694
 695 #undef TARGET_LEGITIMATE_ADDRESS_P
 696 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 697
 698 #undef TARGET_PREFERRED_RELOAD_CLASS
 699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 700
 701 #undef TARGET_PROMOTED_TYPE
 702 #define TARGET_PROMOTED_TYPE arm_promoted_type
 703
 704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 706
 707 #undef TARGET_COMPUTE_FRAME_LAYOUT
 708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 709
 710 #undef TARGET_FRAME_POINTER_REQUIRED
 711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 712
 713 #undef TARGET_CAN_ELIMINATE
 714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 715
 716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 718
 719 #undef TARGET_CLASS_LIKELY_SPILLED_P
 720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 721
 722 #undef TARGET_VECTORIZE_BUILTINS
 723 #define TARGET_VECTORIZE_BUILTINS
 724
 725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 727   arm_builtin_vectorized_function
 728
 729 #undef TARGET_VECTOR_ALIGNMENT
 730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 731
 732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 734   arm_vector_alignment_reachable
 735
 736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 738   arm_builtin_support_vector_misalignment
 739
 740 #undef TARGET_PREFERRED_RENAME_CLASS
 741 #define TARGET_PREFERRED_RENAME_CLASS \
 742   arm_preferred_rename_class
 743
 744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 746
 747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 749   arm_builtin_vectorization_cost
 750 #undef TARGET_VECTORIZE_ADD_STMT_COST
 751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 752
 753 #undef TARGET_CANONICALIZE_COMPARISON
 754 #define TARGET_CANONICALIZE_COMPARISON \
 755   arm_canonicalize_comparison
 756
 757 #undef TARGET_ASAN_SHADOW_OFFSET
 758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 759
 760 #undef MAX_INSN_PER_IT_BLOCK
 761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 762
 763 #undef TARGET_CAN_USE_DOLOOP_P
 764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 765
 766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 768
 769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 771
 772 #undef TARGET_SCHED_FUSION_PRIORITY
 773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 774
 775 #undef  TARGET_ASM_FUNCTION_SECTION
 776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 777
 778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 780
 781 #undef TARGET_SECTION_TYPE_FLAGS
 782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 783
 784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 786
 787 #undef TARGET_C_EXCESS_PRECISION
 788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 789
 790 /* Although the architecture reserves bits 0 and 1, only the former is
 791    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 794
 795 #undef TARGET_FIXED_CONDITION_CODE_REGS
 796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 797
 798 #undef TARGET_HARD_REGNO_NREGS
 799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 800 #undef TARGET_HARD_REGNO_MODE_OK
 801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 802
 803 #undef TARGET_MODES_TIEABLE_P
 804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 805
 806 #undef TARGET_CAN_CHANGE_MODE_CLASS
 807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 808
 809 #undef TARGET_CONSTANT_ALIGNMENT
 810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 811 \f
 812 /* Obstack for minipool constant handling.  */
 813 static struct obstack minipool_obstack;
 814 static char *         minipool_startobj;
 815
 816 /* The maximum number of insns skipped which
 817    will be conditionalised if possible.  */
 818 static int max_insns_skipped = 5;
 819
 820 extern FILE * asm_out_file;
 821
 822 /* True if we are currently building a constant table.  */
 823 int making_const_table;
 824
 825 /* The processor for which instructions should be scheduled.  */
 826 enum processor_type arm_tune = TARGET_CPU_arm_none;
 827
 828 /* The current tuning set.  */
 829 const struct tune_params *current_tune;
 830
 831 /* Which floating point hardware to schedule for.  */
 832 int arm_fpu_attr;
 833
 834 /* Used for Thumb call_via trampolines.  */
 835 rtx thumb_call_via_label[14];
 836 static int thumb_call_reg_needed;
 837
 838 /* The bits in this mask specify which instruction scheduling options should
 839    be used.  */
 840 unsigned int tune_flags = 0;
 841
 842 /* The highest ARM architecture version supported by the
 843    target.  */
 844 enum base_architecture arm_base_arch = BASE_ARCH_0;
 845
 846 /* Active target architecture and tuning.  */
 847
 848 struct arm_build_target arm_active_target;
 849
 850 /* The following are used in the arm.md file as equivalents to bits
 851    in the above two flag variables.  */
 852
 853 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 854 int arm_arch4 = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 857 int arm_arch4t = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 860 int arm_arch5t = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 863 int arm_arch5te = 0;
 864
 865 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 866 int arm_arch6 = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6K extensions.  */
 869 int arm_arch6k = 0;
 870
 871 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 872 int arm_arch6kz = 0;
 873
 874 /* Nonzero if instructions present in ARMv6-M can be used.  */
 875 int arm_arch6m = 0;
 876
 877 /* Nonzero if this chip supports the ARM 7 extensions.  */
 878 int arm_arch7 = 0;
 879
 880 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 881 int arm_arch_lpae = 0;
 882
 883 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 884 int arm_arch_notm = 0;
 885
 886 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 887 int arm_arch7em = 0;
 888
 889 /* Nonzero if instructions present in ARMv8 can be used.  */
 890 int arm_arch8 = 0;
 891
 892 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 893 int arm_arch8_1 = 0;
 894
 895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 896 int arm_arch8_2 = 0;
 897
 898 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 899 int arm_arch8_3 = 0;
 900
 901 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 902 int arm_arch8_4 = 0;
 903
 904 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 905    Architecture 8.2.  */
 906 int arm_fp16_inst = 0;
 907
 908 /* Nonzero if this chip can benefit from load scheduling.  */
 909 int arm_ld_sched = 0;
 910
 911 /* Nonzero if this chip is a StrongARM.  */
 912 int arm_tune_strongarm = 0;
 913
 914 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 915 int arm_arch_iwmmxt = 0;
 916
 917 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 918 int arm_arch_iwmmxt2 = 0;
 919
 920 /* Nonzero if this chip is an XScale.  */
 921 int arm_arch_xscale = 0;
 922
 923 /* Nonzero if tuning for XScale  */
 924 int arm_tune_xscale = 0;
 925
 926 /* Nonzero if we want to tune for stores that access the write-buffer.
 927    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 928 int arm_tune_wbuf = 0;
 929
 930 /* Nonzero if tuning for Cortex-A9.  */
 931 int arm_tune_cortex_a9 = 0;
 932
 933 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 934    preprocessor.
 935    XXX This is a bit of a hack, it's intended to help work around
 936    problems in GLD which doesn't understand that armv5t code is
 937    interworking clean.  */
 938 int arm_cpp_interwork = 0;
 939
 940 /* Nonzero if chip supports Thumb 1.  */
 941 int arm_arch_thumb1;
 942
 943 /* Nonzero if chip supports Thumb 2.  */
 944 int arm_arch_thumb2;
 945
 946 /* Nonzero if chip supports integer division instruction.  */
 947 int arm_arch_arm_hwdiv;
 948 int arm_arch_thumb_hwdiv;
 949
 950 /* Nonzero if chip disallows volatile memory access in IT block.  */
 951 int arm_arch_no_volatile_ce;
 952
 953 /* Nonzero if we should use Neon to handle 64-bits operations rather
 954    than core registers.  */
 955 int prefer_neon_for_64bits = 0;
 956
 957 /* Nonzero if we shouldn't use literal pools.  */
 958 bool arm_disable_literal_pool = false;
 959
 960 /* The register number to be used for the PIC offset register.  */
 961 unsigned arm_pic_register = INVALID_REGNUM;
 962
 963 enum arm_pcs arm_pcs_default;
 964
 965 /* For an explanation of these variables, see final_prescan_insn below.  */
 966 int arm_ccfsm_state;
 967 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 968 enum arm_cond_code arm_current_cc;
 969
 970 rtx arm_target_insn;
 971 int arm_target_label;
 972 /* The number of conditionally executed insns, including the current insn.  */
 973 int arm_condexec_count = 0;
 974 /* A bitmask specifying the patterns for the IT block.
 975    Zero means do not output an IT block before this insn. */
 976 int arm_condexec_mask = 0;
 977 /* The number of bits used in arm_condexec_mask.  */
 978 int arm_condexec_masklen = 0;
 979
 980 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 981 int arm_arch_crc = 0;
 982
 983 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 984 int arm_arch_dotprod = 0;
 985
 986 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 987 int arm_arch_cmse = 0;
 988
 989 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 990 int arm_m_profile_small_mul = 0;
 991
 992 /* The condition codes of the ARM, and the inverse function.  */
 993 static const char * const arm_condition_codes[] =
 994 {
 995   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 996   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 997 };
 998
 999 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1000 int arm_regs_in_sequence[] =
1001 {
1002   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1003 };
1004
1005 #define ARM_LSL_NAME "lsl"
1006 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1007
1008 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1009                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1010                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1011 \f
1012 /* Initialization code.  */
1013
1014 struct cpu_tune
1015 {
1016   enum processor_type scheduler;
1017   unsigned int tune_flags;
1018   const struct tune_params *tune;
1019 };
1020
1021 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1022 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1023   {                                                             \
1024     num_slots,                                                  \
1025     l1_size,                                                    \
1026     l1_line_size                                                \
1027   }
1028
1029 /* arm generic vectorizer costs.  */
1030 static const
1031 struct cpu_vec_costs arm_default_vec_cost = {
1032   1,                                    /* scalar_stmt_cost.  */
1033   1,                                    /* scalar load_cost.  */
1034   1,                                    /* scalar_store_cost.  */
1035   1,                                    /* vec_stmt_cost.  */
1036   1,                                    /* vec_to_scalar_cost.  */
1037   1,                                    /* scalar_to_vec_cost.  */
1038   1,                                    /* vec_align_load_cost.  */
1039   1,                                    /* vec_unalign_load_cost.  */
1040   1,                                    /* vec_unalign_store_cost.  */
1041   1,                                    /* vec_store_cost.  */
1042   3,                                    /* cond_taken_branch_cost.  */
1043   1,                                    /* cond_not_taken_branch_cost.  */
1044 };
1045
1046 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1047 #include "aarch-cost-tables.h"
1048
1049
1050
1051 const struct cpu_cost_table cortexa9_extra_costs =
1052 {
1053   /* ALU */
1054   {
1055     0,                  /* arith.  */
1056     0,                  /* logical.  */
1057     0,                  /* shift.  */
1058     COSTS_N_INSNS (1),  /* shift_reg.  */
1059     COSTS_N_INSNS (1),  /* arith_shift.  */
1060     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1061     0,                  /* log_shift.  */
1062     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1063     COSTS_N_INSNS (1),  /* extend.  */
1064     COSTS_N_INSNS (2),  /* extend_arith.  */
1065     COSTS_N_INSNS (1),  /* bfi.  */
1066     COSTS_N_INSNS (1),  /* bfx.  */
1067     0,                  /* clz.  */
1068     0,                  /* rev.  */
1069     0,                  /* non_exec.  */
1070     true                /* non_exec_costs_exec.  */
1071   },
1072   {
1073     /* MULT SImode */
1074     {
1075       COSTS_N_INSNS (3),        /* simple.  */
1076       COSTS_N_INSNS (3),        /* flag_setting.  */
1077       COSTS_N_INSNS (2),        /* extend.  */
1078       COSTS_N_INSNS (3),        /* add.  */
1079       COSTS_N_INSNS (2),        /* extend_add.  */
1080       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1081     },
1082     /* MULT DImode */
1083     {
1084       0,                        /* simple (N/A).  */
1085       0,                        /* flag_setting (N/A).  */
1086       COSTS_N_INSNS (4),        /* extend.  */
1087       0,                        /* add (N/A).  */
1088       COSTS_N_INSNS (4),        /* extend_add.  */
1089       0                         /* idiv (N/A).  */
1090     }
1091   },
1092   /* LD/ST */
1093   {
1094     COSTS_N_INSNS (2),  /* load.  */
1095     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1096     COSTS_N_INSNS (2),  /* ldrd.  */
1097     COSTS_N_INSNS (2),  /* ldm_1st.  */
1098     1,                  /* ldm_regs_per_insn_1st.  */
1099     2,                  /* ldm_regs_per_insn_subsequent.  */
1100     COSTS_N_INSNS (5),  /* loadf.  */
1101     COSTS_N_INSNS (5),  /* loadd.  */
1102     COSTS_N_INSNS (1),  /* load_unaligned.  */
1103     COSTS_N_INSNS (2),  /* store.  */
1104     COSTS_N_INSNS (2),  /* strd.  */
1105     COSTS_N_INSNS (2),  /* stm_1st.  */
1106     1,                  /* stm_regs_per_insn_1st.  */
1107     2,                  /* stm_regs_per_insn_subsequent.  */
1108     COSTS_N_INSNS (1),  /* storef.  */
1109     COSTS_N_INSNS (1),  /* stored.  */
1110     COSTS_N_INSNS (1),  /* store_unaligned.  */
1111     COSTS_N_INSNS (1),  /* loadv.  */
1112     COSTS_N_INSNS (1)   /* storev.  */
1113   },
1114   {
1115     /* FP SFmode */
1116     {
1117       COSTS_N_INSNS (14),       /* div.  */
1118       COSTS_N_INSNS (4),        /* mult.  */
1119       COSTS_N_INSNS (7),        /* mult_addsub. */
1120       COSTS_N_INSNS (30),       /* fma.  */
1121       COSTS_N_INSNS (3),        /* addsub.  */
1122       COSTS_N_INSNS (1),        /* fpconst.  */
1123       COSTS_N_INSNS (1),        /* neg.  */
1124       COSTS_N_INSNS (3),        /* compare.  */
1125       COSTS_N_INSNS (3),        /* widen.  */
1126       COSTS_N_INSNS (3),        /* narrow.  */
1127       COSTS_N_INSNS (3),        /* toint.  */
1128       COSTS_N_INSNS (3),        /* fromint.  */
1129       COSTS_N_INSNS (3)         /* roundint.  */
1130     },
1131     /* FP DFmode */
1132     {
1133       COSTS_N_INSNS (24),       /* div.  */
1134       COSTS_N_INSNS (5),        /* mult.  */
1135       COSTS_N_INSNS (8),        /* mult_addsub.  */
1136       COSTS_N_INSNS (30),       /* fma.  */
1137       COSTS_N_INSNS (3),        /* addsub.  */
1138       COSTS_N_INSNS (1),        /* fpconst.  */
1139       COSTS_N_INSNS (1),        /* neg.  */
1140       COSTS_N_INSNS (3),        /* compare.  */
1141       COSTS_N_INSNS (3),        /* widen.  */
1142       COSTS_N_INSNS (3),        /* narrow.  */
1143       COSTS_N_INSNS (3),        /* toint.  */
1144       COSTS_N_INSNS (3),        /* fromint.  */
1145       COSTS_N_INSNS (3)         /* roundint.  */
1146     }
1147   },
1148   /* Vector */
1149   {
1150     COSTS_N_INSNS (1)   /* alu.  */
1151   }
1152 };
1153
1154 const struct cpu_cost_table cortexa8_extra_costs =
1155 {
1156   /* ALU */
1157   {
1158     0,                  /* arith.  */
1159     0,                  /* logical.  */
1160     COSTS_N_INSNS (1),  /* shift.  */
1161     0,                  /* shift_reg.  */
1162     COSTS_N_INSNS (1),  /* arith_shift.  */
1163     0,                  /* arith_shift_reg.  */
1164     COSTS_N_INSNS (1),  /* log_shift.  */
1165     0,                  /* log_shift_reg.  */
1166     0,                  /* extend.  */
1167     0,                  /* extend_arith.  */
1168     0,                  /* bfi.  */
1169     0,                  /* bfx.  */
1170     0,                  /* clz.  */
1171     0,                  /* rev.  */
1172     0,                  /* non_exec.  */
1173     true                /* non_exec_costs_exec.  */
1174   },
1175   {
1176     /* MULT SImode */
1177     {
1178       COSTS_N_INSNS (1),        /* simple.  */
1179       COSTS_N_INSNS (1),        /* flag_setting.  */
1180       COSTS_N_INSNS (1),        /* extend.  */
1181       COSTS_N_INSNS (1),        /* add.  */
1182       COSTS_N_INSNS (1),        /* extend_add.  */
1183       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1184     },
1185     /* MULT DImode */
1186     {
1187       0,                        /* simple (N/A).  */
1188       0,                        /* flag_setting (N/A).  */
1189       COSTS_N_INSNS (2),        /* extend.  */
1190       0,                        /* add (N/A).  */
1191       COSTS_N_INSNS (2),        /* extend_add.  */
1192       0                         /* idiv (N/A).  */
1193     }
1194   },
1195   /* LD/ST */
1196   {
1197     COSTS_N_INSNS (1),  /* load.  */
1198     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1199     COSTS_N_INSNS (1),  /* ldrd.  */
1200     COSTS_N_INSNS (1),  /* ldm_1st.  */
1201     1,                  /* ldm_regs_per_insn_1st.  */
1202     2,                  /* ldm_regs_per_insn_subsequent.  */
1203     COSTS_N_INSNS (1),  /* loadf.  */
1204     COSTS_N_INSNS (1),  /* loadd.  */
1205     COSTS_N_INSNS (1),  /* load_unaligned.  */
1206     COSTS_N_INSNS (1),  /* store.  */
1207     COSTS_N_INSNS (1),  /* strd.  */
1208     COSTS_N_INSNS (1),  /* stm_1st.  */
1209     1,                  /* stm_regs_per_insn_1st.  */
1210     2,                  /* stm_regs_per_insn_subsequent.  */
1211     COSTS_N_INSNS (1),  /* storef.  */
1212     COSTS_N_INSNS (1),  /* stored.  */
1213     COSTS_N_INSNS (1),  /* store_unaligned.  */
1214     COSTS_N_INSNS (1),  /* loadv.  */
1215     COSTS_N_INSNS (1)   /* storev.  */
1216   },
1217   {
1218     /* FP SFmode */
1219     {
1220       COSTS_N_INSNS (36),       /* div.  */
1221       COSTS_N_INSNS (11),       /* mult.  */
1222       COSTS_N_INSNS (20),       /* mult_addsub. */
1223       COSTS_N_INSNS (30),       /* fma.  */
1224       COSTS_N_INSNS (9),        /* addsub.  */
1225       COSTS_N_INSNS (3),        /* fpconst.  */
1226       COSTS_N_INSNS (3),        /* neg.  */
1227       COSTS_N_INSNS (6),        /* compare.  */
1228       COSTS_N_INSNS (4),        /* widen.  */
1229       COSTS_N_INSNS (4),        /* narrow.  */
1230       COSTS_N_INSNS (8),        /* toint.  */
1231       COSTS_N_INSNS (8),        /* fromint.  */
1232       COSTS_N_INSNS (8)         /* roundint.  */
1233     },
1234     /* FP DFmode */
1235     {
1236       COSTS_N_INSNS (64),       /* div.  */
1237       COSTS_N_INSNS (16),       /* mult.  */
1238       COSTS_N_INSNS (25),       /* mult_addsub.  */
1239       COSTS_N_INSNS (30),       /* fma.  */
1240       COSTS_N_INSNS (9),        /* addsub.  */
1241       COSTS_N_INSNS (3),        /* fpconst.  */
1242       COSTS_N_INSNS (3),        /* neg.  */
1243       COSTS_N_INSNS (6),        /* compare.  */
1244       COSTS_N_INSNS (6),        /* widen.  */
1245       COSTS_N_INSNS (6),        /* narrow.  */
1246       COSTS_N_INSNS (8),        /* toint.  */
1247       COSTS_N_INSNS (8),        /* fromint.  */
1248       COSTS_N_INSNS (8)         /* roundint.  */
1249     }
1250   },
1251   /* Vector */
1252   {
1253     COSTS_N_INSNS (1)   /* alu.  */
1254   }
1255 };
1256
1257 const struct cpu_cost_table cortexa5_extra_costs =
1258 {
1259   /* ALU */
1260   {
1261     0,                  /* arith.  */
1262     0,                  /* logical.  */
1263     COSTS_N_INSNS (1),  /* shift.  */
1264     COSTS_N_INSNS (1),  /* shift_reg.  */
1265     COSTS_N_INSNS (1),  /* arith_shift.  */
1266     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1267     COSTS_N_INSNS (1),  /* log_shift.  */
1268     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1269     COSTS_N_INSNS (1),  /* extend.  */
1270     COSTS_N_INSNS (1),  /* extend_arith.  */
1271     COSTS_N_INSNS (1),  /* bfi.  */
1272     COSTS_N_INSNS (1),  /* bfx.  */
1273     COSTS_N_INSNS (1),  /* clz.  */
1274     COSTS_N_INSNS (1),  /* rev.  */
1275     0,                  /* non_exec.  */
1276     true                /* non_exec_costs_exec.  */
1277   },
1278
1279   {
1280     /* MULT SImode */
1281     {
1282       0,                        /* simple.  */
1283       COSTS_N_INSNS (1),        /* flag_setting.  */
1284       COSTS_N_INSNS (1),        /* extend.  */
1285       COSTS_N_INSNS (1),        /* add.  */
1286       COSTS_N_INSNS (1),        /* extend_add.  */
1287       COSTS_N_INSNS (7)         /* idiv.  */
1288     },
1289     /* MULT DImode */
1290     {
1291       0,                        /* simple (N/A).  */
1292       0,                        /* flag_setting (N/A).  */
1293       COSTS_N_INSNS (1),        /* extend.  */
1294       0,                        /* add.  */
1295       COSTS_N_INSNS (2),        /* extend_add.  */
1296       0                         /* idiv (N/A).  */
1297     }
1298   },
1299   /* LD/ST */
1300   {
1301     COSTS_N_INSNS (1),  /* load.  */
1302     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1303     COSTS_N_INSNS (6),  /* ldrd.  */
1304     COSTS_N_INSNS (1),  /* ldm_1st.  */
1305     1,                  /* ldm_regs_per_insn_1st.  */
1306     2,                  /* ldm_regs_per_insn_subsequent.  */
1307     COSTS_N_INSNS (2),  /* loadf.  */
1308     COSTS_N_INSNS (4),  /* loadd.  */
1309     COSTS_N_INSNS (1),  /* load_unaligned.  */
1310     COSTS_N_INSNS (1),  /* store.  */
1311     COSTS_N_INSNS (3),  /* strd.  */
1312     COSTS_N_INSNS (1),  /* stm_1st.  */
1313     1,                  /* stm_regs_per_insn_1st.  */
1314     2,                  /* stm_regs_per_insn_subsequent.  */
1315     COSTS_N_INSNS (2),  /* storef.  */
1316     COSTS_N_INSNS (2),  /* stored.  */
1317     COSTS_N_INSNS (1),  /* store_unaligned.  */
1318     COSTS_N_INSNS (1),  /* loadv.  */
1319     COSTS_N_INSNS (1)   /* storev.  */
1320   },
1321   {
1322     /* FP SFmode */
1323     {
1324       COSTS_N_INSNS (15),       /* div.  */
1325       COSTS_N_INSNS (3),        /* mult.  */
1326       COSTS_N_INSNS (7),        /* mult_addsub. */
1327       COSTS_N_INSNS (7),        /* fma.  */
1328       COSTS_N_INSNS (3),        /* addsub.  */
1329       COSTS_N_INSNS (3),        /* fpconst.  */
1330       COSTS_N_INSNS (3),        /* neg.  */
1331       COSTS_N_INSNS (3),        /* compare.  */
1332       COSTS_N_INSNS (3),        /* widen.  */
1333       COSTS_N_INSNS (3),        /* narrow.  */
1334       COSTS_N_INSNS (3),        /* toint.  */
1335       COSTS_N_INSNS (3),        /* fromint.  */
1336       COSTS_N_INSNS (3)         /* roundint.  */
1337     },
1338     /* FP DFmode */
1339     {
1340       COSTS_N_INSNS (30),       /* div.  */
1341       COSTS_N_INSNS (6),        /* mult.  */
1342       COSTS_N_INSNS (10),       /* mult_addsub.  */
1343       COSTS_N_INSNS (7),        /* fma.  */
1344       COSTS_N_INSNS (3),        /* addsub.  */
1345       COSTS_N_INSNS (3),        /* fpconst.  */
1346       COSTS_N_INSNS (3),        /* neg.  */
1347       COSTS_N_INSNS (3),        /* compare.  */
1348       COSTS_N_INSNS (3),        /* widen.  */
1349       COSTS_N_INSNS (3),        /* narrow.  */
1350       COSTS_N_INSNS (3),        /* toint.  */
1351       COSTS_N_INSNS (3),        /* fromint.  */
1352       COSTS_N_INSNS (3)         /* roundint.  */
1353     }
1354   },
1355   /* Vector */
1356   {
1357     COSTS_N_INSNS (1)   /* alu.  */
1358   }
1359 };
1360
1361
1362 const struct cpu_cost_table cortexa7_extra_costs =
1363 {
1364   /* ALU */
1365   {
1366     0,                  /* arith.  */
1367     0,                  /* logical.  */
1368     COSTS_N_INSNS (1),  /* shift.  */
1369     COSTS_N_INSNS (1),  /* shift_reg.  */
1370     COSTS_N_INSNS (1),  /* arith_shift.  */
1371     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1372     COSTS_N_INSNS (1),  /* log_shift.  */
1373     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1374     COSTS_N_INSNS (1),  /* extend.  */
1375     COSTS_N_INSNS (1),  /* extend_arith.  */
1376     COSTS_N_INSNS (1),  /* bfi.  */
1377     COSTS_N_INSNS (1),  /* bfx.  */
1378     COSTS_N_INSNS (1),  /* clz.  */
1379     COSTS_N_INSNS (1),  /* rev.  */
1380     0,                  /* non_exec.  */
1381     true                /* non_exec_costs_exec.  */
1382   },
1383
1384   {
1385     /* MULT SImode */
1386     {
1387       0,                        /* simple.  */
1388       COSTS_N_INSNS (1),        /* flag_setting.  */
1389       COSTS_N_INSNS (1),        /* extend.  */
1390       COSTS_N_INSNS (1),        /* add.  */
1391       COSTS_N_INSNS (1),        /* extend_add.  */
1392       COSTS_N_INSNS (7)         /* idiv.  */
1393     },
1394     /* MULT DImode */
1395     {
1396       0,                        /* simple (N/A).  */
1397       0,                        /* flag_setting (N/A).  */
1398       COSTS_N_INSNS (1),        /* extend.  */
1399       0,                        /* add.  */
1400       COSTS_N_INSNS (2),        /* extend_add.  */
1401       0                         /* idiv (N/A).  */
1402     }
1403   },
1404   /* LD/ST */
1405   {
1406     COSTS_N_INSNS (1),  /* load.  */
1407     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1408     COSTS_N_INSNS (3),  /* ldrd.  */
1409     COSTS_N_INSNS (1),  /* ldm_1st.  */
1410     1,                  /* ldm_regs_per_insn_1st.  */
1411     2,                  /* ldm_regs_per_insn_subsequent.  */
1412     COSTS_N_INSNS (2),  /* loadf.  */
1413     COSTS_N_INSNS (2),  /* loadd.  */
1414     COSTS_N_INSNS (1),  /* load_unaligned.  */
1415     COSTS_N_INSNS (1),  /* store.  */
1416     COSTS_N_INSNS (3),  /* strd.  */
1417     COSTS_N_INSNS (1),  /* stm_1st.  */
1418     1,                  /* stm_regs_per_insn_1st.  */
1419     2,                  /* stm_regs_per_insn_subsequent.  */
1420     COSTS_N_INSNS (2),  /* storef.  */
1421     COSTS_N_INSNS (2),  /* stored.  */
1422     COSTS_N_INSNS (1),  /* store_unaligned.  */
1423     COSTS_N_INSNS (1),  /* loadv.  */
1424     COSTS_N_INSNS (1)   /* storev.  */
1425   },
1426   {
1427     /* FP SFmode */
1428     {
1429       COSTS_N_INSNS (15),       /* div.  */
1430       COSTS_N_INSNS (3),        /* mult.  */
1431       COSTS_N_INSNS (7),        /* mult_addsub. */
1432       COSTS_N_INSNS (7),        /* fma.  */
1433       COSTS_N_INSNS (3),        /* addsub.  */
1434       COSTS_N_INSNS (3),        /* fpconst.  */
1435       COSTS_N_INSNS (3),        /* neg.  */
1436       COSTS_N_INSNS (3),        /* compare.  */
1437       COSTS_N_INSNS (3),        /* widen.  */
1438       COSTS_N_INSNS (3),        /* narrow.  */
1439       COSTS_N_INSNS (3),        /* toint.  */
1440       COSTS_N_INSNS (3),        /* fromint.  */
1441       COSTS_N_INSNS (3)         /* roundint.  */
1442     },
1443     /* FP DFmode */
1444     {
1445       COSTS_N_INSNS (30),       /* div.  */
1446       COSTS_N_INSNS (6),        /* mult.  */
1447       COSTS_N_INSNS (10),       /* mult_addsub.  */
1448       COSTS_N_INSNS (7),        /* fma.  */
1449       COSTS_N_INSNS (3),        /* addsub.  */
1450       COSTS_N_INSNS (3),        /* fpconst.  */
1451       COSTS_N_INSNS (3),        /* neg.  */
1452       COSTS_N_INSNS (3),        /* compare.  */
1453       COSTS_N_INSNS (3),        /* widen.  */
1454       COSTS_N_INSNS (3),        /* narrow.  */
1455       COSTS_N_INSNS (3),        /* toint.  */
1456       COSTS_N_INSNS (3),        /* fromint.  */
1457       COSTS_N_INSNS (3)         /* roundint.  */
1458     }
1459   },
1460   /* Vector */
1461   {
1462     COSTS_N_INSNS (1)   /* alu.  */
1463   }
1464 };
1465
1466 const struct cpu_cost_table cortexa12_extra_costs =
1467 {
1468   /* ALU */
1469   {
1470     0,                  /* arith.  */
1471     0,                  /* logical.  */
1472     0,                  /* shift.  */
1473     COSTS_N_INSNS (1),  /* shift_reg.  */
1474     COSTS_N_INSNS (1),  /* arith_shift.  */
1475     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1476     COSTS_N_INSNS (1),  /* log_shift.  */
1477     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1478     0,                  /* extend.  */
1479     COSTS_N_INSNS (1),  /* extend_arith.  */
1480     0,                  /* bfi.  */
1481     COSTS_N_INSNS (1),  /* bfx.  */
1482     COSTS_N_INSNS (1),  /* clz.  */
1483     COSTS_N_INSNS (1),  /* rev.  */
1484     0,                  /* non_exec.  */
1485     true                /* non_exec_costs_exec.  */
1486   },
1487   /* MULT SImode */
1488   {
1489     {
1490       COSTS_N_INSNS (2),        /* simple.  */
1491       COSTS_N_INSNS (3),        /* flag_setting.  */
1492       COSTS_N_INSNS (2),        /* extend.  */
1493       COSTS_N_INSNS (3),        /* add.  */
1494       COSTS_N_INSNS (2),        /* extend_add.  */
1495       COSTS_N_INSNS (18)        /* idiv.  */
1496     },
1497     /* MULT DImode */
1498     {
1499       0,                        /* simple (N/A).  */
1500       0,                        /* flag_setting (N/A).  */
1501       COSTS_N_INSNS (3),        /* extend.  */
1502       0,                        /* add (N/A).  */
1503       COSTS_N_INSNS (3),        /* extend_add.  */
1504       0                         /* idiv (N/A).  */
1505     }
1506   },
1507   /* LD/ST */
1508   {
1509     COSTS_N_INSNS (3),  /* load.  */
1510     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1511     COSTS_N_INSNS (3),  /* ldrd.  */
1512     COSTS_N_INSNS (3),  /* ldm_1st.  */
1513     1,                  /* ldm_regs_per_insn_1st.  */
1514     2,                  /* ldm_regs_per_insn_subsequent.  */
1515     COSTS_N_INSNS (3),  /* loadf.  */
1516     COSTS_N_INSNS (3),  /* loadd.  */
1517     0,                  /* load_unaligned.  */
1518     0,                  /* store.  */
1519     0,                  /* strd.  */
1520     0,                  /* stm_1st.  */
1521     1,                  /* stm_regs_per_insn_1st.  */
1522     2,                  /* stm_regs_per_insn_subsequent.  */
1523     COSTS_N_INSNS (2),  /* storef.  */
1524     COSTS_N_INSNS (2),  /* stored.  */
1525     0,                  /* store_unaligned.  */
1526     COSTS_N_INSNS (1),  /* loadv.  */
1527     COSTS_N_INSNS (1)   /* storev.  */
1528   },
1529   {
1530     /* FP SFmode */
1531     {
1532       COSTS_N_INSNS (17),       /* div.  */
1533       COSTS_N_INSNS (4),        /* mult.  */
1534       COSTS_N_INSNS (8),        /* mult_addsub. */
1535       COSTS_N_INSNS (8),        /* fma.  */
1536       COSTS_N_INSNS (4),        /* addsub.  */
1537       COSTS_N_INSNS (2),        /* fpconst. */
1538       COSTS_N_INSNS (2),        /* neg.  */
1539       COSTS_N_INSNS (2),        /* compare.  */
1540       COSTS_N_INSNS (4),        /* widen.  */
1541       COSTS_N_INSNS (4),        /* narrow.  */
1542       COSTS_N_INSNS (4),        /* toint.  */
1543       COSTS_N_INSNS (4),        /* fromint.  */
1544       COSTS_N_INSNS (4)         /* roundint.  */
1545     },
1546     /* FP DFmode */
1547     {
1548       COSTS_N_INSNS (31),       /* div.  */
1549       COSTS_N_INSNS (4),        /* mult.  */
1550       COSTS_N_INSNS (8),        /* mult_addsub.  */
1551       COSTS_N_INSNS (8),        /* fma.  */
1552       COSTS_N_INSNS (4),        /* addsub.  */
1553       COSTS_N_INSNS (2),        /* fpconst.  */
1554       COSTS_N_INSNS (2),        /* neg.  */
1555       COSTS_N_INSNS (2),        /* compare.  */
1556       COSTS_N_INSNS (4),        /* widen.  */
1557       COSTS_N_INSNS (4),        /* narrow.  */
1558       COSTS_N_INSNS (4),        /* toint.  */
1559       COSTS_N_INSNS (4),        /* fromint.  */
1560       COSTS_N_INSNS (4)         /* roundint.  */
1561     }
1562   },
1563   /* Vector */
1564   {
1565     COSTS_N_INSNS (1)   /* alu.  */
1566   }
1567 };
1568
1569 const struct cpu_cost_table cortexa15_extra_costs =
1570 {
1571   /* ALU */
1572   {
1573     0,                  /* arith.  */
1574     0,                  /* logical.  */
1575     0,                  /* shift.  */
1576     0,                  /* shift_reg.  */
1577     COSTS_N_INSNS (1),  /* arith_shift.  */
1578     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1579     COSTS_N_INSNS (1),  /* log_shift.  */
1580     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1581     0,                  /* extend.  */
1582     COSTS_N_INSNS (1),  /* extend_arith.  */
1583     COSTS_N_INSNS (1),  /* bfi.  */
1584     0,                  /* bfx.  */
1585     0,                  /* clz.  */
1586     0,                  /* rev.  */
1587     0,                  /* non_exec.  */
1588     true                /* non_exec_costs_exec.  */
1589   },
1590   /* MULT SImode */
1591   {
1592     {
1593       COSTS_N_INSNS (2),        /* simple.  */
1594       COSTS_N_INSNS (3),        /* flag_setting.  */
1595       COSTS_N_INSNS (2),        /* extend.  */
1596       COSTS_N_INSNS (2),        /* add.  */
1597       COSTS_N_INSNS (2),        /* extend_add.  */
1598       COSTS_N_INSNS (18)        /* idiv.  */
1599     },
1600     /* MULT DImode */
1601     {
1602       0,                        /* simple (N/A).  */
1603       0,                        /* flag_setting (N/A).  */
1604       COSTS_N_INSNS (3),        /* extend.  */
1605       0,                        /* add (N/A).  */
1606       COSTS_N_INSNS (3),        /* extend_add.  */
1607       0                         /* idiv (N/A).  */
1608     }
1609   },
1610   /* LD/ST */
1611   {
1612     COSTS_N_INSNS (3),  /* load.  */
1613     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1614     COSTS_N_INSNS (3),  /* ldrd.  */
1615     COSTS_N_INSNS (4),  /* ldm_1st.  */
1616     1,                  /* ldm_regs_per_insn_1st.  */
1617     2,                  /* ldm_regs_per_insn_subsequent.  */
1618     COSTS_N_INSNS (4),  /* loadf.  */
1619     COSTS_N_INSNS (4),  /* loadd.  */
1620     0,                  /* load_unaligned.  */
1621     0,                  /* store.  */
1622     0,                  /* strd.  */
1623     COSTS_N_INSNS (1),  /* stm_1st.  */
1624     1,                  /* stm_regs_per_insn_1st.  */
1625     2,                  /* stm_regs_per_insn_subsequent.  */
1626     0,                  /* storef.  */
1627     0,                  /* stored.  */
1628     0,                  /* store_unaligned.  */
1629     COSTS_N_INSNS (1),  /* loadv.  */
1630     COSTS_N_INSNS (1)   /* storev.  */
1631   },
1632   {
1633     /* FP SFmode */
1634     {
1635       COSTS_N_INSNS (17),       /* div.  */
1636       COSTS_N_INSNS (4),        /* mult.  */
1637       COSTS_N_INSNS (8),        /* mult_addsub. */
1638       COSTS_N_INSNS (8),        /* fma.  */
1639       COSTS_N_INSNS (4),        /* addsub.  */
1640       COSTS_N_INSNS (2),        /* fpconst. */
1641       COSTS_N_INSNS (2),        /* neg.  */
1642       COSTS_N_INSNS (5),        /* compare.  */
1643       COSTS_N_INSNS (4),        /* widen.  */
1644       COSTS_N_INSNS (4),        /* narrow.  */
1645       COSTS_N_INSNS (4),        /* toint.  */
1646       COSTS_N_INSNS (4),        /* fromint.  */
1647       COSTS_N_INSNS (4)         /* roundint.  */
1648     },
1649     /* FP DFmode */
1650     {
1651       COSTS_N_INSNS (31),       /* div.  */
1652       COSTS_N_INSNS (4),        /* mult.  */
1653       COSTS_N_INSNS (8),        /* mult_addsub.  */
1654       COSTS_N_INSNS (8),        /* fma.  */
1655       COSTS_N_INSNS (4),        /* addsub.  */
1656       COSTS_N_INSNS (2),        /* fpconst.  */
1657       COSTS_N_INSNS (2),        /* neg.  */
1658       COSTS_N_INSNS (2),        /* compare.  */
1659       COSTS_N_INSNS (4),        /* widen.  */
1660       COSTS_N_INSNS (4),        /* narrow.  */
1661       COSTS_N_INSNS (4),        /* toint.  */
1662       COSTS_N_INSNS (4),        /* fromint.  */
1663       COSTS_N_INSNS (4)         /* roundint.  */
1664     }
1665   },
1666   /* Vector */
1667   {
1668     COSTS_N_INSNS (1)   /* alu.  */
1669   }
1670 };
1671
1672 const struct cpu_cost_table v7m_extra_costs =
1673 {
1674   /* ALU */
1675   {
1676     0,                  /* arith.  */
1677     0,                  /* logical.  */
1678     0,                  /* shift.  */
1679     0,                  /* shift_reg.  */
1680     0,                  /* arith_shift.  */
1681     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1682     0,                  /* log_shift.  */
1683     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1684     0,                  /* extend.  */
1685     COSTS_N_INSNS (1),  /* extend_arith.  */
1686     0,                  /* bfi.  */
1687     0,                  /* bfx.  */
1688     0,                  /* clz.  */
1689     0,                  /* rev.  */
1690     COSTS_N_INSNS (1),  /* non_exec.  */
1691     false               /* non_exec_costs_exec.  */
1692   },
1693   {
1694     /* MULT SImode */
1695     {
1696       COSTS_N_INSNS (1),        /* simple.  */
1697       COSTS_N_INSNS (1),        /* flag_setting.  */
1698       COSTS_N_INSNS (2),        /* extend.  */
1699       COSTS_N_INSNS (1),        /* add.  */
1700       COSTS_N_INSNS (3),        /* extend_add.  */
1701       COSTS_N_INSNS (8)         /* idiv.  */
1702     },
1703     /* MULT DImode */
1704     {
1705       0,                        /* simple (N/A).  */
1706       0,                        /* flag_setting (N/A).  */
1707       COSTS_N_INSNS (2),        /* extend.  */
1708       0,                        /* add (N/A).  */
1709       COSTS_N_INSNS (3),        /* extend_add.  */
1710       0                         /* idiv (N/A).  */
1711     }
1712   },
1713   /* LD/ST */
1714   {
1715     COSTS_N_INSNS (2),  /* load.  */
1716     0,                  /* load_sign_extend.  */
1717     COSTS_N_INSNS (3),  /* ldrd.  */
1718     COSTS_N_INSNS (2),  /* ldm_1st.  */
1719     1,                  /* ldm_regs_per_insn_1st.  */
1720     1,                  /* ldm_regs_per_insn_subsequent.  */
1721     COSTS_N_INSNS (2),  /* loadf.  */
1722     COSTS_N_INSNS (3),  /* loadd.  */
1723     COSTS_N_INSNS (1),  /* load_unaligned.  */
1724     COSTS_N_INSNS (2),  /* store.  */
1725     COSTS_N_INSNS (3),  /* strd.  */
1726     COSTS_N_INSNS (2),  /* stm_1st.  */
1727     1,                  /* stm_regs_per_insn_1st.  */
1728     1,                  /* stm_regs_per_insn_subsequent.  */
1729     COSTS_N_INSNS (2),  /* storef.  */
1730     COSTS_N_INSNS (3),  /* stored.  */
1731     COSTS_N_INSNS (1),  /* store_unaligned.  */
1732     COSTS_N_INSNS (1),  /* loadv.  */
1733     COSTS_N_INSNS (1)   /* storev.  */
1734   },
1735   {
1736     /* FP SFmode */
1737     {
1738       COSTS_N_INSNS (7),        /* div.  */
1739       COSTS_N_INSNS (2),        /* mult.  */
1740       COSTS_N_INSNS (5),        /* mult_addsub.  */
1741       COSTS_N_INSNS (3),        /* fma.  */
1742       COSTS_N_INSNS (1),        /* addsub.  */
1743       0,                        /* fpconst.  */
1744       0,                        /* neg.  */
1745       0,                        /* compare.  */
1746       0,                        /* widen.  */
1747       0,                        /* narrow.  */
1748       0,                        /* toint.  */
1749       0,                        /* fromint.  */
1750       0                         /* roundint.  */
1751     },
1752     /* FP DFmode */
1753     {
1754       COSTS_N_INSNS (15),       /* div.  */
1755       COSTS_N_INSNS (5),        /* mult.  */
1756       COSTS_N_INSNS (7),        /* mult_addsub.  */
1757       COSTS_N_INSNS (7),        /* fma.  */
1758       COSTS_N_INSNS (3),        /* addsub.  */
1759       0,                        /* fpconst.  */
1760       0,                        /* neg.  */
1761       0,                        /* compare.  */
1762       0,                        /* widen.  */
1763       0,                        /* narrow.  */
1764       0,                        /* toint.  */
1765       0,                        /* fromint.  */
1766       0                         /* roundint.  */
1767     }
1768   },
1769   /* Vector */
1770   {
1771     COSTS_N_INSNS (1)   /* alu.  */
1772   }
1773 };
1774
1775 const struct addr_mode_cost_table generic_addr_mode_costs =
1776 {
1777   /* int.  */
1778   {
1779     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1780     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1781     COSTS_N_INSNS (0)   /* AMO_WB.  */
1782   },
1783   /* float.  */
1784   {
1785     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1786     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1787     COSTS_N_INSNS (0)   /* AMO_WB.  */
1788   },
1789   /* vector.  */
1790   {
1791     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1792     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1793     COSTS_N_INSNS (0)   /* AMO_WB.  */
1794   }
1795 };
1796
1797 const struct tune_params arm_slowmul_tune =
1798 {
1799   &generic_extra_costs,                 /* Insn extra costs.  */
1800   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1801   NULL,                                 /* Sched adj cost.  */
1802   arm_default_branch_cost,
1803   &arm_default_vec_cost,
1804   3,                                            /* Constant limit.  */
1805   5,                                            /* Max cond insns.  */
1806   8,                                            /* Memset max inline.  */
1807   1,                                            /* Issue rate.  */
1808   ARM_PREFETCH_NOT_BENEFICIAL,
1809   tune_params::PREF_CONST_POOL_TRUE,
1810   tune_params::PREF_LDRD_FALSE,
1811   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1812   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1813   tune_params::DISPARAGE_FLAGS_NEITHER,
1814   tune_params::PREF_NEON_64_FALSE,
1815   tune_params::PREF_NEON_STRINGOPS_FALSE,
1816   tune_params::FUSE_NOTHING,
1817   tune_params::SCHED_AUTOPREF_OFF
1818 };
1819
1820 const struct tune_params arm_fastmul_tune =
1821 {
1822   &generic_extra_costs,                 /* Insn extra costs.  */
1823   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1824   NULL,                                 /* Sched adj cost.  */
1825   arm_default_branch_cost,
1826   &arm_default_vec_cost,
1827   1,                                            /* Constant limit.  */
1828   5,                                            /* Max cond insns.  */
1829   8,                                            /* Memset max inline.  */
1830   1,                                            /* Issue rate.  */
1831   ARM_PREFETCH_NOT_BENEFICIAL,
1832   tune_params::PREF_CONST_POOL_TRUE,
1833   tune_params::PREF_LDRD_FALSE,
1834   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1835   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1836   tune_params::DISPARAGE_FLAGS_NEITHER,
1837   tune_params::PREF_NEON_64_FALSE,
1838   tune_params::PREF_NEON_STRINGOPS_FALSE,
1839   tune_params::FUSE_NOTHING,
1840   tune_params::SCHED_AUTOPREF_OFF
1841 };
1842
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1845
1846 const struct tune_params arm_strongarm_tune =
1847 {
1848   &generic_extra_costs,                 /* Insn extra costs.  */
1849   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1850   NULL,                                 /* Sched adj cost.  */
1851   arm_default_branch_cost,
1852   &arm_default_vec_cost,
1853   1,                                            /* Constant limit.  */
1854   3,                                            /* Max cond insns.  */
1855   8,                                            /* Memset max inline.  */
1856   1,                                            /* Issue rate.  */
1857   ARM_PREFETCH_NOT_BENEFICIAL,
1858   tune_params::PREF_CONST_POOL_TRUE,
1859   tune_params::PREF_LDRD_FALSE,
1860   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1861   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1862   tune_params::DISPARAGE_FLAGS_NEITHER,
1863   tune_params::PREF_NEON_64_FALSE,
1864   tune_params::PREF_NEON_STRINGOPS_FALSE,
1865   tune_params::FUSE_NOTHING,
1866   tune_params::SCHED_AUTOPREF_OFF
1867 };
1868
1869 const struct tune_params arm_xscale_tune =
1870 {
1871   &generic_extra_costs,                 /* Insn extra costs.  */
1872   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1873   xscale_sched_adjust_cost,
1874   arm_default_branch_cost,
1875   &arm_default_vec_cost,
1876   2,                                            /* Constant limit.  */
1877   3,                                            /* Max cond insns.  */
1878   8,                                            /* Memset max inline.  */
1879   1,                                            /* Issue rate.  */
1880   ARM_PREFETCH_NOT_BENEFICIAL,
1881   tune_params::PREF_CONST_POOL_TRUE,
1882   tune_params::PREF_LDRD_FALSE,
1883   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1884   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1885   tune_params::DISPARAGE_FLAGS_NEITHER,
1886   tune_params::PREF_NEON_64_FALSE,
1887   tune_params::PREF_NEON_STRINGOPS_FALSE,
1888   tune_params::FUSE_NOTHING,
1889   tune_params::SCHED_AUTOPREF_OFF
1890 };
1891
1892 const struct tune_params arm_9e_tune =
1893 {
1894   &generic_extra_costs,                 /* Insn extra costs.  */
1895   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1896   NULL,                                 /* Sched adj cost.  */
1897   arm_default_branch_cost,
1898   &arm_default_vec_cost,
1899   1,                                            /* Constant limit.  */
1900   5,                                            /* Max cond insns.  */
1901   8,                                            /* Memset max inline.  */
1902   1,                                            /* Issue rate.  */
1903   ARM_PREFETCH_NOT_BENEFICIAL,
1904   tune_params::PREF_CONST_POOL_TRUE,
1905   tune_params::PREF_LDRD_FALSE,
1906   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1907   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1908   tune_params::DISPARAGE_FLAGS_NEITHER,
1909   tune_params::PREF_NEON_64_FALSE,
1910   tune_params::PREF_NEON_STRINGOPS_FALSE,
1911   tune_params::FUSE_NOTHING,
1912   tune_params::SCHED_AUTOPREF_OFF
1913 };
1914
1915 const struct tune_params arm_marvell_pj4_tune =
1916 {
1917   &generic_extra_costs,                 /* Insn extra costs.  */
1918   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1919   NULL,                                 /* Sched adj cost.  */
1920   arm_default_branch_cost,
1921   &arm_default_vec_cost,
1922   1,                                            /* Constant limit.  */
1923   5,                                            /* Max cond insns.  */
1924   8,                                            /* Memset max inline.  */
1925   2,                                            /* Issue rate.  */
1926   ARM_PREFETCH_NOT_BENEFICIAL,
1927   tune_params::PREF_CONST_POOL_TRUE,
1928   tune_params::PREF_LDRD_FALSE,
1929   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1930   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1931   tune_params::DISPARAGE_FLAGS_NEITHER,
1932   tune_params::PREF_NEON_64_FALSE,
1933   tune_params::PREF_NEON_STRINGOPS_FALSE,
1934   tune_params::FUSE_NOTHING,
1935   tune_params::SCHED_AUTOPREF_OFF
1936 };
1937
1938 const struct tune_params arm_v6t2_tune =
1939 {
1940   &generic_extra_costs,                 /* Insn extra costs.  */
1941   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1942   NULL,                                 /* Sched adj cost.  */
1943   arm_default_branch_cost,
1944   &arm_default_vec_cost,
1945   1,                                            /* Constant limit.  */
1946   5,                                            /* Max cond insns.  */
1947   8,                                            /* Memset max inline.  */
1948   1,                                            /* Issue rate.  */
1949   ARM_PREFETCH_NOT_BENEFICIAL,
1950   tune_params::PREF_CONST_POOL_FALSE,
1951   tune_params::PREF_LDRD_FALSE,
1952   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1953   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1954   tune_params::DISPARAGE_FLAGS_NEITHER,
1955   tune_params::PREF_NEON_64_FALSE,
1956   tune_params::PREF_NEON_STRINGOPS_FALSE,
1957   tune_params::FUSE_NOTHING,
1958   tune_params::SCHED_AUTOPREF_OFF
1959 };
1960
1961
1962 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1963 const struct tune_params arm_cortex_tune =
1964 {
1965   &generic_extra_costs,
1966   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1967   NULL,                                 /* Sched adj cost.  */
1968   arm_default_branch_cost,
1969   &arm_default_vec_cost,
1970   1,                                            /* Constant limit.  */
1971   5,                                            /* Max cond insns.  */
1972   8,                                            /* Memset max inline.  */
1973   2,                                            /* Issue rate.  */
1974   ARM_PREFETCH_NOT_BENEFICIAL,
1975   tune_params::PREF_CONST_POOL_FALSE,
1976   tune_params::PREF_LDRD_FALSE,
1977   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1978   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1979   tune_params::DISPARAGE_FLAGS_NEITHER,
1980   tune_params::PREF_NEON_64_FALSE,
1981   tune_params::PREF_NEON_STRINGOPS_FALSE,
1982   tune_params::FUSE_NOTHING,
1983   tune_params::SCHED_AUTOPREF_OFF
1984 };
1985
1986 const struct tune_params arm_cortex_a8_tune =
1987 {
1988   &cortexa8_extra_costs,
1989   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1990   NULL,                                 /* Sched adj cost.  */
1991   arm_default_branch_cost,
1992   &arm_default_vec_cost,
1993   1,                                            /* Constant limit.  */
1994   5,                                            /* Max cond insns.  */
1995   8,                                            /* Memset max inline.  */
1996   2,                                            /* Issue rate.  */
1997   ARM_PREFETCH_NOT_BENEFICIAL,
1998   tune_params::PREF_CONST_POOL_FALSE,
1999   tune_params::PREF_LDRD_FALSE,
2000   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2001   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2002   tune_params::DISPARAGE_FLAGS_NEITHER,
2003   tune_params::PREF_NEON_64_FALSE,
2004   tune_params::PREF_NEON_STRINGOPS_TRUE,
2005   tune_params::FUSE_NOTHING,
2006   tune_params::SCHED_AUTOPREF_OFF
2007 };
2008
2009 const struct tune_params arm_cortex_a7_tune =
2010 {
2011   &cortexa7_extra_costs,
2012   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2013   NULL,                                 /* Sched adj cost.  */
2014   arm_default_branch_cost,
2015   &arm_default_vec_cost,
2016   1,                                            /* Constant limit.  */
2017   5,                                            /* Max cond insns.  */
2018   8,                                            /* Memset max inline.  */
2019   2,                                            /* Issue rate.  */
2020   ARM_PREFETCH_NOT_BENEFICIAL,
2021   tune_params::PREF_CONST_POOL_FALSE,
2022   tune_params::PREF_LDRD_FALSE,
2023   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2024   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2025   tune_params::DISPARAGE_FLAGS_NEITHER,
2026   tune_params::PREF_NEON_64_FALSE,
2027   tune_params::PREF_NEON_STRINGOPS_TRUE,
2028   tune_params::FUSE_NOTHING,
2029   tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032 const struct tune_params arm_cortex_a15_tune =
2033 {
2034   &cortexa15_extra_costs,
2035   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2036   NULL,                                 /* Sched adj cost.  */
2037   arm_default_branch_cost,
2038   &arm_default_vec_cost,
2039   1,                                            /* Constant limit.  */
2040   2,                                            /* Max cond insns.  */
2041   8,                                            /* Memset max inline.  */
2042   3,                                            /* Issue rate.  */
2043   ARM_PREFETCH_NOT_BENEFICIAL,
2044   tune_params::PREF_CONST_POOL_FALSE,
2045   tune_params::PREF_LDRD_TRUE,
2046   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2047   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2048   tune_params::DISPARAGE_FLAGS_ALL,
2049   tune_params::PREF_NEON_64_FALSE,
2050   tune_params::PREF_NEON_STRINGOPS_TRUE,
2051   tune_params::FUSE_NOTHING,
2052   tune_params::SCHED_AUTOPREF_FULL
2053 };
2054
2055 const struct tune_params arm_cortex_a35_tune =
2056 {
2057   &cortexa53_extra_costs,
2058   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2059   NULL,                                 /* Sched adj cost.  */
2060   arm_default_branch_cost,
2061   &arm_default_vec_cost,
2062   1,                                            /* Constant limit.  */
2063   5,                                            /* Max cond insns.  */
2064   8,                                            /* Memset max inline.  */
2065   1,                                            /* Issue rate.  */
2066   ARM_PREFETCH_NOT_BENEFICIAL,
2067   tune_params::PREF_CONST_POOL_FALSE,
2068   tune_params::PREF_LDRD_FALSE,
2069   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2070   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2071   tune_params::DISPARAGE_FLAGS_NEITHER,
2072   tune_params::PREF_NEON_64_FALSE,
2073   tune_params::PREF_NEON_STRINGOPS_TRUE,
2074   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2075   tune_params::SCHED_AUTOPREF_OFF
2076 };
2077
2078 const struct tune_params arm_cortex_a53_tune =
2079 {
2080   &cortexa53_extra_costs,
2081   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2082   NULL,                                 /* Sched adj cost.  */
2083   arm_default_branch_cost,
2084   &arm_default_vec_cost,
2085   1,                                            /* Constant limit.  */
2086   5,                                            /* Max cond insns.  */
2087   8,                                            /* Memset max inline.  */
2088   2,                                            /* Issue rate.  */
2089   ARM_PREFETCH_NOT_BENEFICIAL,
2090   tune_params::PREF_CONST_POOL_FALSE,
2091   tune_params::PREF_LDRD_FALSE,
2092   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2094   tune_params::DISPARAGE_FLAGS_NEITHER,
2095   tune_params::PREF_NEON_64_FALSE,
2096   tune_params::PREF_NEON_STRINGOPS_TRUE,
2097   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2098   tune_params::SCHED_AUTOPREF_OFF
2099 };
2100
2101 const struct tune_params arm_cortex_a57_tune =
2102 {
2103   &cortexa57_extra_costs,
2104   &generic_addr_mode_costs,             /* addressing mode costs */
2105   NULL,                                 /* Sched adj cost.  */
2106   arm_default_branch_cost,
2107   &arm_default_vec_cost,
2108   1,                                            /* Constant limit.  */
2109   2,                                            /* Max cond insns.  */
2110   8,                                            /* Memset max inline.  */
2111   3,                                            /* Issue rate.  */
2112   ARM_PREFETCH_NOT_BENEFICIAL,
2113   tune_params::PREF_CONST_POOL_FALSE,
2114   tune_params::PREF_LDRD_TRUE,
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2116   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2117   tune_params::DISPARAGE_FLAGS_ALL,
2118   tune_params::PREF_NEON_64_FALSE,
2119   tune_params::PREF_NEON_STRINGOPS_TRUE,
2120   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2121   tune_params::SCHED_AUTOPREF_FULL
2122 };
2123
2124 const struct tune_params arm_exynosm1_tune =
2125 {
2126   &exynosm1_extra_costs,
2127   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2128   NULL,                                         /* Sched adj cost.  */
2129   arm_default_branch_cost,
2130   &arm_default_vec_cost,
2131   1,                                            /* Constant limit.  */
2132   2,                                            /* Max cond insns.  */
2133   8,                                            /* Memset max inline.  */
2134   3,                                            /* Issue rate.  */
2135   ARM_PREFETCH_NOT_BENEFICIAL,
2136   tune_params::PREF_CONST_POOL_FALSE,
2137   tune_params::PREF_LDRD_TRUE,
2138   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2139   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2140   tune_params::DISPARAGE_FLAGS_ALL,
2141   tune_params::PREF_NEON_64_FALSE,
2142   tune_params::PREF_NEON_STRINGOPS_TRUE,
2143   tune_params::FUSE_NOTHING,
2144   tune_params::SCHED_AUTOPREF_OFF
2145 };
2146
2147 const struct tune_params arm_xgene1_tune =
2148 {
2149   &xgene1_extra_costs,
2150   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2151   NULL,                                 /* Sched adj cost.  */
2152   arm_default_branch_cost,
2153   &arm_default_vec_cost,
2154   1,                                            /* Constant limit.  */
2155   2,                                            /* Max cond insns.  */
2156   32,                                           /* Memset max inline.  */
2157   4,                                            /* Issue rate.  */
2158   ARM_PREFETCH_NOT_BENEFICIAL,
2159   tune_params::PREF_CONST_POOL_FALSE,
2160   tune_params::PREF_LDRD_TRUE,
2161   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2162   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2163   tune_params::DISPARAGE_FLAGS_ALL,
2164   tune_params::PREF_NEON_64_FALSE,
2165   tune_params::PREF_NEON_STRINGOPS_FALSE,
2166   tune_params::FUSE_NOTHING,
2167   tune_params::SCHED_AUTOPREF_OFF
2168 };
2169
2170 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2171    less appealing.  Set max_insns_skipped to a low value.  */
2172
2173 const struct tune_params arm_cortex_a5_tune =
2174 {
2175   &cortexa5_extra_costs,
2176   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2177   NULL,                                 /* Sched adj cost.  */
2178   arm_cortex_a5_branch_cost,
2179   &arm_default_vec_cost,
2180   1,                                            /* Constant limit.  */
2181   1,                                            /* Max cond insns.  */
2182   8,                                            /* Memset max inline.  */
2183   2,                                            /* Issue rate.  */
2184   ARM_PREFETCH_NOT_BENEFICIAL,
2185   tune_params::PREF_CONST_POOL_FALSE,
2186   tune_params::PREF_LDRD_FALSE,
2187   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2188   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2189   tune_params::DISPARAGE_FLAGS_NEITHER,
2190   tune_params::PREF_NEON_64_FALSE,
2191   tune_params::PREF_NEON_STRINGOPS_TRUE,
2192   tune_params::FUSE_NOTHING,
2193   tune_params::SCHED_AUTOPREF_OFF
2194 };
2195
2196 const struct tune_params arm_cortex_a9_tune =
2197 {
2198   &cortexa9_extra_costs,
2199   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2200   cortex_a9_sched_adjust_cost,
2201   arm_default_branch_cost,
2202   &arm_default_vec_cost,
2203   1,                                            /* Constant limit.  */
2204   5,                                            /* Max cond insns.  */
2205   8,                                            /* Memset max inline.  */
2206   2,                                            /* Issue rate.  */
2207   ARM_PREFETCH_BENEFICIAL(4,32,32),
2208   tune_params::PREF_CONST_POOL_FALSE,
2209   tune_params::PREF_LDRD_FALSE,
2210   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2211   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2212   tune_params::DISPARAGE_FLAGS_NEITHER,
2213   tune_params::PREF_NEON_64_FALSE,
2214   tune_params::PREF_NEON_STRINGOPS_FALSE,
2215   tune_params::FUSE_NOTHING,
2216   tune_params::SCHED_AUTOPREF_OFF
2217 };
2218
2219 const struct tune_params arm_cortex_a12_tune =
2220 {
2221   &cortexa12_extra_costs,
2222   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2223   NULL,                                 /* Sched adj cost.  */
2224   arm_default_branch_cost,
2225   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2226   1,                                            /* Constant limit.  */
2227   2,                                            /* Max cond insns.  */
2228   8,                                            /* Memset max inline.  */
2229   2,                                            /* Issue rate.  */
2230   ARM_PREFETCH_NOT_BENEFICIAL,
2231   tune_params::PREF_CONST_POOL_FALSE,
2232   tune_params::PREF_LDRD_TRUE,
2233   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2234   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2235   tune_params::DISPARAGE_FLAGS_ALL,
2236   tune_params::PREF_NEON_64_FALSE,
2237   tune_params::PREF_NEON_STRINGOPS_TRUE,
2238   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2239   tune_params::SCHED_AUTOPREF_OFF
2240 };
2241
2242 const struct tune_params arm_cortex_a73_tune =
2243 {
2244   &cortexa57_extra_costs,
2245   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2246   NULL,                                         /* Sched adj cost.  */
2247   arm_default_branch_cost,
2248   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2249   1,                                            /* Constant limit.  */
2250   2,                                            /* Max cond insns.  */
2251   8,                                            /* Memset max inline.  */
2252   2,                                            /* Issue rate.  */
2253   ARM_PREFETCH_NOT_BENEFICIAL,
2254   tune_params::PREF_CONST_POOL_FALSE,
2255   tune_params::PREF_LDRD_TRUE,
2256   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2257   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2258   tune_params::DISPARAGE_FLAGS_ALL,
2259   tune_params::PREF_NEON_64_FALSE,
2260   tune_params::PREF_NEON_STRINGOPS_TRUE,
2261   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2262   tune_params::SCHED_AUTOPREF_FULL
2263 };
2264
2265 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2266    cycle to execute each.  An LDR from the constant pool also takes two cycles
2267    to execute, but mildly increases pipelining opportunity (consecutive
2268    loads/stores can be pipelined together, saving one cycle), and may also
2269    improve icache utilisation.  Hence we prefer the constant pool for such
2270    processors.  */
2271
2272 const struct tune_params arm_v7m_tune =
2273 {
2274   &v7m_extra_costs,
2275   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2276   NULL,                                 /* Sched adj cost.  */
2277   arm_cortex_m_branch_cost,
2278   &arm_default_vec_cost,
2279   1,                                            /* Constant limit.  */
2280   2,                                            /* Max cond insns.  */
2281   8,                                            /* Memset max inline.  */
2282   1,                                            /* Issue rate.  */
2283   ARM_PREFETCH_NOT_BENEFICIAL,
2284   tune_params::PREF_CONST_POOL_TRUE,
2285   tune_params::PREF_LDRD_FALSE,
2286   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2287   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2288   tune_params::DISPARAGE_FLAGS_NEITHER,
2289   tune_params::PREF_NEON_64_FALSE,
2290   tune_params::PREF_NEON_STRINGOPS_FALSE,
2291   tune_params::FUSE_NOTHING,
2292   tune_params::SCHED_AUTOPREF_OFF
2293 };
2294
2295 /* Cortex-M7 tuning.  */
2296
2297 const struct tune_params arm_cortex_m7_tune =
2298 {
2299   &v7m_extra_costs,
2300   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2301   NULL,                                 /* Sched adj cost.  */
2302   arm_cortex_m7_branch_cost,
2303   &arm_default_vec_cost,
2304   0,                                            /* Constant limit.  */
2305   1,                                            /* Max cond insns.  */
2306   8,                                            /* Memset max inline.  */
2307   2,                                            /* Issue rate.  */
2308   ARM_PREFETCH_NOT_BENEFICIAL,
2309   tune_params::PREF_CONST_POOL_TRUE,
2310   tune_params::PREF_LDRD_FALSE,
2311   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2312   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2313   tune_params::DISPARAGE_FLAGS_NEITHER,
2314   tune_params::PREF_NEON_64_FALSE,
2315   tune_params::PREF_NEON_STRINGOPS_FALSE,
2316   tune_params::FUSE_NOTHING,
2317   tune_params::SCHED_AUTOPREF_OFF
2318 };
2319
2320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2321    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2322    cortex-m23.  */
2323 const struct tune_params arm_v6m_tune =
2324 {
2325   &generic_extra_costs,                 /* Insn extra costs.  */
2326   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2327   NULL,                                 /* Sched adj cost.  */
2328   arm_default_branch_cost,
2329   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2330   1,                                            /* Constant limit.  */
2331   5,                                            /* Max cond insns.  */
2332   8,                                            /* Memset max inline.  */
2333   1,                                            /* Issue rate.  */
2334   ARM_PREFETCH_NOT_BENEFICIAL,
2335   tune_params::PREF_CONST_POOL_FALSE,
2336   tune_params::PREF_LDRD_FALSE,
2337   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2338   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2339   tune_params::DISPARAGE_FLAGS_NEITHER,
2340   tune_params::PREF_NEON_64_FALSE,
2341   tune_params::PREF_NEON_STRINGOPS_FALSE,
2342   tune_params::FUSE_NOTHING,
2343   tune_params::SCHED_AUTOPREF_OFF
2344 };
2345
2346 const struct tune_params arm_fa726te_tune =
2347 {
2348   &generic_extra_costs,                         /* Insn extra costs.  */
2349   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2350   fa726te_sched_adjust_cost,
2351   arm_default_branch_cost,
2352   &arm_default_vec_cost,
2353   1,                                            /* Constant limit.  */
2354   5,                                            /* Max cond insns.  */
2355   8,                                            /* Memset max inline.  */
2356   2,                                            /* Issue rate.  */
2357   ARM_PREFETCH_NOT_BENEFICIAL,
2358   tune_params::PREF_CONST_POOL_TRUE,
2359   tune_params::PREF_LDRD_FALSE,
2360   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2361   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2362   tune_params::DISPARAGE_FLAGS_NEITHER,
2363   tune_params::PREF_NEON_64_FALSE,
2364   tune_params::PREF_NEON_STRINGOPS_FALSE,
2365   tune_params::FUSE_NOTHING,
2366   tune_params::SCHED_AUTOPREF_OFF
2367 };
2368
2369 /* Auto-generated CPU, FPU and architecture tables.  */
2370 #include "arm-cpu-data.h"
2371
2372 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2373    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2374    is thus chosen to be big enough to hold the longest architecture name.  */
2375
2376 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2377
2378 /* Supported TLS relocations.  */
2379
2380 enum tls_reloc {
2381   TLS_GD32,
2382   TLS_LDM32,
2383   TLS_LDO32,
2384   TLS_IE32,
2385   TLS_LE32,
2386   TLS_DESCSEQ   /* GNU scheme */
2387 };
2388
2389 /* The maximum number of insns to be used when loading a constant.  */
2390 inline static int
2391 arm_constant_limit (bool size_p)
2392 {
2393   return size_p ? 1 : current_tune->constant_limit;
2394 }
2395
2396 /* Emit an insn that's a simple single-set.  Both the operands must be known
2397    to be valid.  */
2398 inline static rtx_insn *
2399 emit_set_insn (rtx x, rtx y)
2400 {
2401   return emit_insn (gen_rtx_SET (x, y));
2402 }
2403
2404 /* Return the number of bits set in VALUE.  */
2405 static unsigned
2406 bit_count (unsigned long value)
2407 {
2408   unsigned long count = 0;
2409
2410   while (value)
2411     {
2412       count++;
2413       value &= value - 1;  /* Clear the least-significant set bit.  */
2414     }
2415
2416   return count;
2417 }
2418
2419 /* Return the number of bits set in BMAP.  */
2420 static unsigned
2421 bitmap_popcount (const sbitmap bmap)
2422 {
2423   unsigned int count = 0;
2424   unsigned int n = 0;
2425   sbitmap_iterator sbi;
2426
2427   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2428     count++;
2429   return count;
2430 }
2431
2432 typedef struct
2433 {
2434   machine_mode mode;
2435   const char *name;
2436 } arm_fixed_mode_set;
2437
2438 /* A small helper for setting fixed-point library libfuncs.  */
2439
2440 static void
2441 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2442                              const char *funcname, const char *modename,
2443                              int num_suffix)
2444 {
2445   char buffer[50];
2446
2447   if (num_suffix == 0)
2448     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2449   else
2450     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2451
2452   set_optab_libfunc (optable, mode, buffer);
2453 }
2454
2455 static void
2456 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2457                             machine_mode from, const char *funcname,
2458                             const char *toname, const char *fromname)
2459 {
2460   char buffer[50];
2461   const char *maybe_suffix_2 = "";
2462
2463   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2464   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2465       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2466       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2467     maybe_suffix_2 = "2";
2468
2469   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2470            maybe_suffix_2);
2471
2472   set_conv_libfunc (optable, to, from, buffer);
2473 }
2474
2475 static GTY(()) rtx speculation_barrier_libfunc;
2476
2477 /* Set up library functions unique to ARM.  */
2478 static void
2479 arm_init_libfuncs (void)
2480 {
2481   /* For Linux, we have access to kernel support for atomic operations.  */
2482   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2483     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2484
2485   /* There are no special library functions unless we are using the
2486      ARM BPABI.  */
2487   if (!TARGET_BPABI)
2488     return;
2489
2490   /* The functions below are described in Section 4 of the "Run-Time
2491      ABI for the ARM architecture", Version 1.0.  */
2492
2493   /* Double-precision floating-point arithmetic.  Table 2.  */
2494   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2495   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2496   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2497   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2498   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2499
2500   /* Double-precision comparisons.  Table 3.  */
2501   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2502   set_optab_libfunc (ne_optab, DFmode, NULL);
2503   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2504   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2505   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2506   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2507   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2508
2509   /* Single-precision floating-point arithmetic.  Table 4.  */
2510   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2511   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2512   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2513   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2514   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2515
2516   /* Single-precision comparisons.  Table 5.  */
2517   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2518   set_optab_libfunc (ne_optab, SFmode, NULL);
2519   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2520   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2521   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2522   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2523   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2524
2525   /* Floating-point to integer conversions.  Table 6.  */
2526   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2527   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2528   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2529   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2530   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2531   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2532   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2533   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2534
2535   /* Conversions between floating types.  Table 7.  */
2536   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2537   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2538
2539   /* Integer to floating-point conversions.  Table 8.  */
2540   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2541   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2542   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2543   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2544   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2545   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2546   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2547   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2548
2549   /* Long long.  Table 9.  */
2550   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2551   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2552   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2553   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2554   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2555   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2556   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2557   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2558
2559   /* Integer (32/32->32) division.  \S 4.3.1.  */
2560   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2561   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2562
2563   /* The divmod functions are designed so that they can be used for
2564      plain division, even though they return both the quotient and the
2565      remainder.  The quotient is returned in the usual location (i.e.,
2566      r0 for SImode, {r0, r1} for DImode), just as would be expected
2567      for an ordinary division routine.  Because the AAPCS calling
2568      conventions specify that all of { r0, r1, r2, r3 } are
2569      callee-saved registers, there is no need to tell the compiler
2570      explicitly that those registers are clobbered by these
2571      routines.  */
2572   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2573   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2574
2575   /* For SImode division the ABI provides div-without-mod routines,
2576      which are faster.  */
2577   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2578   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2579
2580   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2581      divmod libcalls instead.  */
2582   set_optab_libfunc (smod_optab, DImode, NULL);
2583   set_optab_libfunc (umod_optab, DImode, NULL);
2584   set_optab_libfunc (smod_optab, SImode, NULL);
2585   set_optab_libfunc (umod_optab, SImode, NULL);
2586
2587   /* Half-precision float operations.  The compiler handles all operations
2588      with NULL libfuncs by converting the SFmode.  */
2589   switch (arm_fp16_format)
2590     {
2591     case ARM_FP16_FORMAT_IEEE:
2592     case ARM_FP16_FORMAT_ALTERNATIVE:
2593
2594       /* Conversions.  */
2595       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2596                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2597                          ? "__gnu_f2h_ieee"
2598                          : "__gnu_f2h_alternative"));
2599       set_conv_libfunc (sext_optab, SFmode, HFmode,
2600                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2601                          ? "__gnu_h2f_ieee"
2602                          : "__gnu_h2f_alternative"));
2603
2604       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2605                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2606                          ? "__gnu_d2h_ieee"
2607                          : "__gnu_d2h_alternative"));
2608
2609       /* Arithmetic.  */
2610       set_optab_libfunc (add_optab, HFmode, NULL);
2611       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2612       set_optab_libfunc (smul_optab, HFmode, NULL);
2613       set_optab_libfunc (neg_optab, HFmode, NULL);
2614       set_optab_libfunc (sub_optab, HFmode, NULL);
2615
2616       /* Comparisons.  */
2617       set_optab_libfunc (eq_optab, HFmode, NULL);
2618       set_optab_libfunc (ne_optab, HFmode, NULL);
2619       set_optab_libfunc (lt_optab, HFmode, NULL);
2620       set_optab_libfunc (le_optab, HFmode, NULL);
2621       set_optab_libfunc (ge_optab, HFmode, NULL);
2622       set_optab_libfunc (gt_optab, HFmode, NULL);
2623       set_optab_libfunc (unord_optab, HFmode, NULL);
2624       break;
2625
2626     default:
2627       break;
2628     }
2629
2630   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2631   {
2632     const arm_fixed_mode_set fixed_arith_modes[] =
2633       {
2634         { E_QQmode, "qq" },
2635         { E_UQQmode, "uqq" },
2636         { E_HQmode, "hq" },
2637         { E_UHQmode, "uhq" },
2638         { E_SQmode, "sq" },
2639         { E_USQmode, "usq" },
2640         { E_DQmode, "dq" },
2641         { E_UDQmode, "udq" },
2642         { E_TQmode, "tq" },
2643         { E_UTQmode, "utq" },
2644         { E_HAmode, "ha" },
2645         { E_UHAmode, "uha" },
2646         { E_SAmode, "sa" },
2647         { E_USAmode, "usa" },
2648         { E_DAmode, "da" },
2649         { E_UDAmode, "uda" },
2650         { E_TAmode, "ta" },
2651         { E_UTAmode, "uta" }
2652       };
2653     const arm_fixed_mode_set fixed_conv_modes[] =
2654       {
2655         { E_QQmode, "qq" },
2656         { E_UQQmode, "uqq" },
2657         { E_HQmode, "hq" },
2658         { E_UHQmode, "uhq" },
2659         { E_SQmode, "sq" },
2660         { E_USQmode, "usq" },
2661         { E_DQmode, "dq" },
2662         { E_UDQmode, "udq" },
2663         { E_TQmode, "tq" },
2664         { E_UTQmode, "utq" },
2665         { E_HAmode, "ha" },
2666         { E_UHAmode, "uha" },
2667         { E_SAmode, "sa" },
2668         { E_USAmode, "usa" },
2669         { E_DAmode, "da" },
2670         { E_UDAmode, "uda" },
2671         { E_TAmode, "ta" },
2672         { E_UTAmode, "uta" },
2673         { E_QImode, "qi" },
2674         { E_HImode, "hi" },
2675         { E_SImode, "si" },
2676         { E_DImode, "di" },
2677         { E_TImode, "ti" },
2678         { E_SFmode, "sf" },
2679         { E_DFmode, "df" }
2680       };
2681     unsigned int i, j;
2682
2683     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2684       {
2685         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2686                                      "add", fixed_arith_modes[i].name, 3);
2687         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2688                                      "ssadd", fixed_arith_modes[i].name, 3);
2689         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2690                                      "usadd", fixed_arith_modes[i].name, 3);
2691         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2692                                      "sub", fixed_arith_modes[i].name, 3);
2693         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2694                                      "sssub", fixed_arith_modes[i].name, 3);
2695         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2696                                      "ussub", fixed_arith_modes[i].name, 3);
2697         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2698                                      "mul", fixed_arith_modes[i].name, 3);
2699         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2700                                      "ssmul", fixed_arith_modes[i].name, 3);
2701         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2702                                      "usmul", fixed_arith_modes[i].name, 3);
2703         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2704                                      "div", fixed_arith_modes[i].name, 3);
2705         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2706                                      "udiv", fixed_arith_modes[i].name, 3);
2707         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2708                                      "ssdiv", fixed_arith_modes[i].name, 3);
2709         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2710                                      "usdiv", fixed_arith_modes[i].name, 3);
2711         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2712                                      "neg", fixed_arith_modes[i].name, 2);
2713         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2714                                      "ssneg", fixed_arith_modes[i].name, 2);
2715         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2716                                      "usneg", fixed_arith_modes[i].name, 2);
2717         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2718                                      "ashl", fixed_arith_modes[i].name, 3);
2719         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2720                                      "ashr", fixed_arith_modes[i].name, 3);
2721         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2722                                      "lshr", fixed_arith_modes[i].name, 3);
2723         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2724                                      "ssashl", fixed_arith_modes[i].name, 3);
2725         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2726                                      "usashl", fixed_arith_modes[i].name, 3);
2727         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2728                                      "cmp", fixed_arith_modes[i].name, 2);
2729       }
2730
2731     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2732       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2733         {
2734           if (i == j
2735               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2736                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2737             continue;
2738
2739           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2740                                       fixed_conv_modes[j].mode, "fract",
2741                                       fixed_conv_modes[i].name,
2742                                       fixed_conv_modes[j].name);
2743           arm_set_fixed_conv_libfunc (satfract_optab,
2744                                       fixed_conv_modes[i].mode,
2745                                       fixed_conv_modes[j].mode, "satfract",
2746                                       fixed_conv_modes[i].name,
2747                                       fixed_conv_modes[j].name);
2748           arm_set_fixed_conv_libfunc (fractuns_optab,
2749                                       fixed_conv_modes[i].mode,
2750                                       fixed_conv_modes[j].mode, "fractuns",
2751                                       fixed_conv_modes[i].name,
2752                                       fixed_conv_modes[j].name);
2753           arm_set_fixed_conv_libfunc (satfractuns_optab,
2754                                       fixed_conv_modes[i].mode,
2755                                       fixed_conv_modes[j].mode, "satfractuns",
2756                                       fixed_conv_modes[i].name,
2757                                       fixed_conv_modes[j].name);
2758         }
2759   }
2760
2761   if (TARGET_AAPCS_BASED)
2762     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2763
2764   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2765 }
2766
2767 /* On AAPCS systems, this is the "struct __va_list".  */
2768 static GTY(()) tree va_list_type;
2769
2770 /* Return the type to use as __builtin_va_list.  */
2771 static tree
2772 arm_build_builtin_va_list (void)
2773 {
2774   tree va_list_name;
2775   tree ap_field;
2776
2777   if (!TARGET_AAPCS_BASED)
2778     return std_build_builtin_va_list ();
2779
2780   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2781      defined as:
2782
2783        struct __va_list
2784        {
2785          void *__ap;
2786        };
2787
2788      The C Library ABI further reinforces this definition in \S
2789      4.1.
2790
2791      We must follow this definition exactly.  The structure tag
2792      name is visible in C++ mangled names, and thus forms a part
2793      of the ABI.  The field name may be used by people who
2794      #include <stdarg.h>.  */
2795   /* Create the type.  */
2796   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2797   /* Give it the required name.  */
2798   va_list_name = build_decl (BUILTINS_LOCATION,
2799                              TYPE_DECL,
2800                              get_identifier ("__va_list"),
2801                              va_list_type);
2802   DECL_ARTIFICIAL (va_list_name) = 1;
2803   TYPE_NAME (va_list_type) = va_list_name;
2804   TYPE_STUB_DECL (va_list_type) = va_list_name;
2805   /* Create the __ap field.  */
2806   ap_field = build_decl (BUILTINS_LOCATION,
2807                          FIELD_DECL,
2808                          get_identifier ("__ap"),
2809                          ptr_type_node);
2810   DECL_ARTIFICIAL (ap_field) = 1;
2811   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2812   TYPE_FIELDS (va_list_type) = ap_field;
2813   /* Compute its layout.  */
2814   layout_type (va_list_type);
2815
2816   return va_list_type;
2817 }
2818
2819 /* Return an expression of type "void *" pointing to the next
2820    available argument in a variable-argument list.  VALIST is the
2821    user-level va_list object, of type __builtin_va_list.  */
2822 static tree
2823 arm_extract_valist_ptr (tree valist)
2824 {
2825   if (TREE_TYPE (valist) == error_mark_node)
2826     return error_mark_node;
2827
2828   /* On an AAPCS target, the pointer is stored within "struct
2829      va_list".  */
2830   if (TARGET_AAPCS_BASED)
2831     {
2832       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2833       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2834                        valist, ap_field, NULL_TREE);
2835     }
2836
2837   return valist;
2838 }
2839
2840 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2841 static void
2842 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2843 {
2844   valist = arm_extract_valist_ptr (valist);
2845   std_expand_builtin_va_start (valist, nextarg);
2846 }
2847
2848 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2849 static tree
2850 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2851                           gimple_seq *post_p)
2852 {
2853   valist = arm_extract_valist_ptr (valist);
2854   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2855 }
2856
2857 /* Check any incompatible options that the user has specified.  */
2858 static void
2859 arm_option_check_internal (struct gcc_options *opts)
2860 {
2861   int flags = opts->x_target_flags;
2862
2863   /* iWMMXt and NEON are incompatible.  */
2864   if (TARGET_IWMMXT
2865       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2866     error ("iWMMXt and NEON are incompatible");
2867
2868   /* Make sure that the processor choice does not conflict with any of the
2869      other command line choices.  */
2870   if (TARGET_ARM_P (flags)
2871       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2872     error ("target CPU does not support ARM mode");
2873
2874   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2875   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2876     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2877
2878   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2879     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2880
2881   /* If this target is normally configured to use APCS frames, warn if they
2882      are turned off and debugging is turned on.  */
2883   if (TARGET_ARM_P (flags)
2884       && write_symbols != NO_DEBUG
2885       && !TARGET_APCS_FRAME
2886       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2887     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2888              "debugging");
2889
2890   /* iWMMXt unsupported under Thumb mode.  */
2891   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2892     error ("iWMMXt unsupported under Thumb mode");
2893
2894   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2895     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2896
2897   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2898     {
2899       error ("RTP PIC is incompatible with Thumb");
2900       flag_pic = 0;
2901     }
2902
2903   if (target_pure_code || target_slow_flash_data)
2904     {
2905       const char *flag = (target_pure_code ? "-mpure-code" :
2906                                              "-mslow-flash-data");
2907
2908       /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2909          with MOVT.  */
2910       if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2911         error ("%s only supports non-pic code on M-profile targets with the "
2912                "MOVT instruction", flag);
2913
2914       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2915          -mword-relocations forbids relocation of MOVT/MOVW.  */
2916       if (target_word_relocations)
2917         error ("%s incompatible with %<-mword-relocations%>", flag);
2918     }
2919 }
2920
2921 /* Recompute the global settings depending on target attribute options.  */
2922
2923 static void
2924 arm_option_params_internal (void)
2925 {
2926   /* If we are not using the default (ARM mode) section anchor offset
2927      ranges, then set the correct ranges now.  */
2928   if (TARGET_THUMB1)
2929     {
2930       /* Thumb-1 LDR instructions cannot have negative offsets.
2931          Permissible positive offset ranges are 5-bit (for byte loads),
2932          6-bit (for halfword loads), or 7-bit (for word loads).
2933          Empirical results suggest a 7-bit anchor range gives the best
2934          overall code size.  */
2935       targetm.min_anchor_offset = 0;
2936       targetm.max_anchor_offset = 127;
2937     }
2938   else if (TARGET_THUMB2)
2939     {
2940       /* The minimum is set such that the total size of the block
2941          for a particular anchor is 248 + 1 + 4095 bytes, which is
2942          divisible by eight, ensuring natural spacing of anchors.  */
2943       targetm.min_anchor_offset = -248;
2944       targetm.max_anchor_offset = 4095;
2945     }
2946   else
2947     {
2948       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2949       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2950     }
2951
2952   /* Increase the number of conditional instructions with -Os.  */
2953   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2954
2955   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2956   if (TARGET_THUMB2)
2957     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2958 }
2959
2960 /* True if -mflip-thumb should next add an attribute for the default
2961    mode, false if it should next add an attribute for the opposite mode.  */
2962 static GTY(()) bool thumb_flipper;
2963
2964 /* Options after initial target override.  */
2965 static GTY(()) tree init_optimize;
2966
2967 static void
2968 arm_override_options_after_change_1 (struct gcc_options *opts)
2969 {
2970   /* -falign-functions without argument: supply one.  */
2971   if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2972     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2973       && opts->x_optimize_size ? "2" : "4";
2974 }
2975
2976 /* Implement targetm.override_options_after_change.  */
2977
2978 static void
2979 arm_override_options_after_change (void)
2980 {
2981   arm_configure_build_target (&arm_active_target,
2982                               TREE_TARGET_OPTION (target_option_default_node),
2983                               &global_options_set, false);
2984
2985   arm_override_options_after_change_1 (&global_options);
2986 }
2987
2988 /* Implement TARGET_OPTION_SAVE.  */
2989 static void
2990 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2991 {
2992   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2993   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2994   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2995 }
2996
2997 /* Implement TARGET_OPTION_RESTORE.  */
2998 static void
2999 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
3000 {
3001   opts->x_arm_arch_string = ptr->x_arm_arch_string;
3002   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
3003   opts->x_arm_tune_string = ptr->x_arm_tune_string;
3004   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
3005                               false);
3006 }
3007
3008 /* Reset options between modes that the user has specified.  */
3009 static void
3010 arm_option_override_internal (struct gcc_options *opts,
3011                               struct gcc_options *opts_set)
3012 {
3013   arm_override_options_after_change_1 (opts);
3014
3015   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3016     {
3017       /* The default is to enable interworking, so this warning message would
3018          be confusing to users who have just compiled with
3019          eg, -march=armv4.  */
3020       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3021       opts->x_target_flags &= ~MASK_INTERWORK;
3022     }
3023
3024   if (TARGET_THUMB_P (opts->x_target_flags)
3025       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3026     {
3027       warning (0, "target CPU does not support THUMB instructions");
3028       opts->x_target_flags &= ~MASK_THUMB;
3029     }
3030
3031   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3032     {
3033       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3034       opts->x_target_flags &= ~MASK_APCS_FRAME;
3035     }
3036
3037   /* Callee super interworking implies thumb interworking.  Adding
3038      this to the flags here simplifies the logic elsewhere.  */
3039   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3040     opts->x_target_flags |= MASK_INTERWORK;
3041
3042   /* need to remember initial values so combinaisons of options like
3043      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3044   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3045
3046   if (! opts_set->x_arm_restrict_it)
3047     opts->x_arm_restrict_it = arm_arch8;
3048
3049   /* ARM execution state and M profile don't have [restrict] IT.  */
3050   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3051     opts->x_arm_restrict_it = 0;
3052
3053   /* Enable -munaligned-access by default for
3054      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3055      i.e. Thumb2 and ARM state only.
3056      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3057      - ARMv8 architecture-base processors.
3058
3059      Disable -munaligned-access by default for
3060      - all pre-ARMv6 architecture-based processors
3061      - ARMv6-M architecture-based processors
3062      - ARMv8-M Baseline processors.  */
3063
3064   if (! opts_set->x_unaligned_access)
3065     {
3066       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3067                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3068     }
3069   else if (opts->x_unaligned_access == 1
3070            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3071     {
3072       warning (0, "target CPU does not support unaligned accesses");
3073      opts->x_unaligned_access = 0;
3074     }
3075
3076   /* Don't warn since it's on by default in -O2.  */
3077   if (TARGET_THUMB1_P (opts->x_target_flags))
3078     opts->x_flag_schedule_insns = 0;
3079   else
3080     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3081
3082   /* Disable shrink-wrap when optimizing function for size, since it tends to
3083      generate additional returns.  */
3084   if (optimize_function_for_size_p (cfun)
3085       && TARGET_THUMB2_P (opts->x_target_flags))
3086     opts->x_flag_shrink_wrap = false;
3087   else
3088     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3089
3090   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3091      - epilogue_insns - does not accurately model the corresponding insns
3092      emitted in the asm file.  In particular, see the comment in thumb_exit
3093      'Find out how many of the (return) argument registers we can corrupt'.
3094      As a consequence, the epilogue may clobber registers without fipa-ra
3095      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3096      TODO: Accurately model clobbers for epilogue_insns and reenable
3097      fipa-ra.  */
3098   if (TARGET_THUMB1_P (opts->x_target_flags))
3099     opts->x_flag_ipa_ra = 0;
3100   else
3101     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3102
3103   /* Thumb2 inline assembly code should always use unified syntax.
3104      This will apply to ARM and Thumb1 eventually.  */
3105   if (TARGET_THUMB2_P (opts->x_target_flags))
3106     opts->x_inline_asm_unified = true;
3107
3108 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3109   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3110 #endif
3111 }
3112
3113 static sbitmap isa_all_fpubits;
3114 static sbitmap isa_quirkbits;
3115
3116 /* Configure a build target TARGET from the user-specified options OPTS and
3117    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3118    architecture have been specified, but the two are not identical.  */
3119 void
3120 arm_configure_build_target (struct arm_build_target *target,
3121                             struct cl_target_option *opts,
3122                             struct gcc_options *opts_set,
3123                             bool warn_compatible)
3124 {
3125   const cpu_option *arm_selected_tune = NULL;
3126   const arch_option *arm_selected_arch = NULL;
3127   const cpu_option *arm_selected_cpu = NULL;
3128   const arm_fpu_desc *arm_selected_fpu = NULL;
3129   const char *tune_opts = NULL;
3130   const char *arch_opts = NULL;
3131   const char *cpu_opts = NULL;
3132
3133   bitmap_clear (target->isa);
3134   target->core_name = NULL;
3135   target->arch_name = NULL;
3136
3137   if (opts_set->x_arm_arch_string)
3138     {
3139       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3140                                                       "-march",
3141                                                       opts->x_arm_arch_string);
3142       arch_opts = strchr (opts->x_arm_arch_string, '+');
3143     }
3144
3145   if (opts_set->x_arm_cpu_string)
3146     {
3147       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3148                                                     opts->x_arm_cpu_string);
3149       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3150       arm_selected_tune = arm_selected_cpu;
3151       /* If taking the tuning from -mcpu, we don't need to rescan the
3152          options for tuning.  */
3153     }
3154
3155   if (opts_set->x_arm_tune_string)
3156     {
3157       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3158                                                      opts->x_arm_tune_string);
3159       tune_opts = strchr (opts->x_arm_tune_string, '+');
3160     }
3161
3162   if (arm_selected_arch)
3163     {
3164       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3165       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3166                                  arch_opts);
3167
3168       if (arm_selected_cpu)
3169         {
3170           auto_sbitmap cpu_isa (isa_num_bits);
3171           auto_sbitmap isa_delta (isa_num_bits);
3172
3173           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3174           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3175                                      cpu_opts);
3176           bitmap_xor (isa_delta, cpu_isa, target->isa);
3177           /* Ignore any bits that are quirk bits.  */
3178           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3179           /* Ignore (for now) any bits that might be set by -mfpu.  */
3180           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3181
3182           if (!bitmap_empty_p (isa_delta))
3183             {
3184               if (warn_compatible)
3185                 warning (0, "switch %<-mcpu=%s%> conflicts "
3186                          "with %<-march=%s%> switch",
3187                          arm_selected_cpu->common.name,
3188                          arm_selected_arch->common.name);
3189               /* -march wins for code generation.
3190                  -mcpu wins for default tuning.  */
3191               if (!arm_selected_tune)
3192                 arm_selected_tune = arm_selected_cpu;
3193
3194               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3195               target->arch_name = arm_selected_arch->common.name;
3196             }
3197           else
3198             {
3199               /* Architecture and CPU are essentially the same.
3200                  Prefer the CPU setting.  */
3201               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3202               target->core_name = arm_selected_cpu->common.name;
3203               /* Copy the CPU's capabilities, so that we inherit the
3204                  appropriate extensions and quirks.  */
3205               bitmap_copy (target->isa, cpu_isa);
3206             }
3207         }
3208       else
3209         {
3210           /* Pick a CPU based on the architecture.  */
3211           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3212           target->arch_name = arm_selected_arch->common.name;
3213           /* Note: target->core_name is left unset in this path.  */
3214         }
3215     }
3216   else if (arm_selected_cpu)
3217     {
3218       target->core_name = arm_selected_cpu->common.name;
3219       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3220       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3221                                  cpu_opts);
3222       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3223     }
3224   /* If the user did not specify a processor or architecture, choose
3225      one for them.  */
3226   else
3227     {
3228       const cpu_option *sel;
3229       auto_sbitmap sought_isa (isa_num_bits);
3230       bitmap_clear (sought_isa);
3231       auto_sbitmap default_isa (isa_num_bits);
3232
3233       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3234                                                     TARGET_CPU_DEFAULT);
3235       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3236       gcc_assert (arm_selected_cpu->common.name);
3237
3238       /* RWE: All of the selection logic below (to the end of this
3239          'if' clause) looks somewhat suspect.  It appears to be mostly
3240          there to support forcing thumb support when the default CPU
3241          does not have thumb (somewhat dubious in terms of what the
3242          user might be expecting).  I think it should be removed once
3243          support for the pre-thumb era cores is removed.  */
3244       sel = arm_selected_cpu;
3245       arm_initialize_isa (default_isa, sel->common.isa_bits);
3246       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3247                                  cpu_opts);
3248
3249       /* Now check to see if the user has specified any command line
3250          switches that require certain abilities from the cpu.  */
3251
3252       if (TARGET_INTERWORK || TARGET_THUMB)
3253         bitmap_set_bit (sought_isa, isa_bit_thumb);
3254
3255       /* If there are such requirements and the default CPU does not
3256          satisfy them, we need to run over the complete list of
3257          cores looking for one that is satisfactory.  */
3258       if (!bitmap_empty_p (sought_isa)
3259           && !bitmap_subset_p (sought_isa, default_isa))
3260         {
3261           auto_sbitmap candidate_isa (isa_num_bits);
3262           /* We're only interested in a CPU with at least the
3263              capabilities of the default CPU and the required
3264              additional features.  */
3265           bitmap_ior (default_isa, default_isa, sought_isa);
3266
3267           /* Try to locate a CPU type that supports all of the abilities
3268              of the default CPU, plus the extra abilities requested by
3269              the user.  */
3270           for (sel = all_cores; sel->common.name != NULL; sel++)
3271             {
3272               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3273               /* An exact match?  */
3274               if (bitmap_equal_p (default_isa, candidate_isa))
3275                 break;
3276             }
3277
3278           if (sel->common.name == NULL)
3279             {
3280               unsigned current_bit_count = isa_num_bits;
3281               const cpu_option *best_fit = NULL;
3282
3283               /* Ideally we would like to issue an error message here
3284                  saying that it was not possible to find a CPU compatible
3285                  with the default CPU, but which also supports the command
3286                  line options specified by the programmer, and so they
3287                  ought to use the -mcpu=<name> command line option to
3288                  override the default CPU type.
3289
3290                  If we cannot find a CPU that has exactly the
3291                  characteristics of the default CPU and the given
3292                  command line options we scan the array again looking
3293                  for a best match.  The best match must have at least
3294                  the capabilities of the perfect match.  */
3295               for (sel = all_cores; sel->common.name != NULL; sel++)
3296                 {
3297                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3298
3299                   if (bitmap_subset_p (default_isa, candidate_isa))
3300                     {
3301                       unsigned count;
3302
3303                       bitmap_and_compl (candidate_isa, candidate_isa,
3304                                         default_isa);
3305                       count = bitmap_popcount (candidate_isa);
3306
3307                       if (count < current_bit_count)
3308                         {
3309                           best_fit = sel;
3310                           current_bit_count = count;
3311                         }
3312                     }
3313
3314                   gcc_assert (best_fit);
3315                   sel = best_fit;
3316                 }
3317             }
3318           arm_selected_cpu = sel;
3319         }
3320
3321       /* Now we know the CPU, we can finally initialize the target
3322          structure.  */
3323       target->core_name = arm_selected_cpu->common.name;
3324       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3325       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3326                                  cpu_opts);
3327       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3328     }
3329
3330   gcc_assert (arm_selected_cpu);
3331   gcc_assert (arm_selected_arch);
3332
3333   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3334     {
3335       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3336       auto_sbitmap fpu_bits (isa_num_bits);
3337
3338       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3339       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3340       bitmap_ior (target->isa, target->isa, fpu_bits);
3341     }
3342
3343   if (!arm_selected_tune)
3344     arm_selected_tune = arm_selected_cpu;
3345   else /* Validate the features passed to -mtune.  */
3346     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3347
3348   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3349
3350   /* Finish initializing the target structure.  */
3351   target->arch_pp_name = arm_selected_arch->arch;
3352   target->base_arch = arm_selected_arch->base_arch;
3353   target->profile = arm_selected_arch->profile;
3354
3355   target->tune_flags = tune_data->tune_flags;
3356   target->tune = tune_data->tune;
3357   target->tune_core = tune_data->scheduler;
3358   arm_option_reconfigure_globals ();
3359 }
3360
3361 /* Fix up any incompatible options that the user has specified.  */
3362 static void
3363 arm_option_override (void)
3364 {
3365   static const enum isa_feature fpu_bitlist[]
3366     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3367   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3368   cl_target_option opts;
3369
3370   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3371   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3372
3373   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3374   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3375
3376   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3377
3378   if (!global_options_set.x_arm_fpu_index)
3379     {
3380       bool ok;
3381       int fpu_index;
3382
3383       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3384                                   CL_TARGET);
3385       gcc_assert (ok);
3386       arm_fpu_index = (enum fpu_type) fpu_index;
3387     }
3388
3389   cl_target_option_save (&opts, &global_options);
3390   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3391                               true);
3392
3393 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3394   SUBTARGET_OVERRIDE_OPTIONS;
3395 #endif
3396
3397   /* Initialize boolean versions of the architectural flags, for use
3398      in the arm.md file and for enabling feature flags.  */
3399   arm_option_reconfigure_globals ();
3400
3401   arm_tune = arm_active_target.tune_core;
3402   tune_flags = arm_active_target.tune_flags;
3403   current_tune = arm_active_target.tune;
3404
3405   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3406   if (TARGET_APCS_FRAME)
3407     flag_shrink_wrap = false;
3408
3409   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3410     {
3411       warning (0, "%<-mapcs-stack-check%> incompatible with "
3412                "%<-mno-apcs-frame%>");
3413       target_flags |= MASK_APCS_FRAME;
3414     }
3415
3416   if (TARGET_POKE_FUNCTION_NAME)
3417     target_flags |= MASK_APCS_FRAME;
3418
3419   if (TARGET_APCS_REENT && flag_pic)
3420     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3421
3422   if (TARGET_APCS_REENT)
3423     warning (0, "APCS reentrant code not supported.  Ignored");
3424
3425   /* Set up some tuning parameters.  */
3426   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3427   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3428   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3429   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3430   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3431   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3432
3433   /* For arm2/3 there is no need to do any scheduling if we are doing
3434      software floating-point.  */
3435   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3436     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3437
3438   /* Override the default structure alignment for AAPCS ABI.  */
3439   if (!global_options_set.x_arm_structure_size_boundary)
3440     {
3441       if (TARGET_AAPCS_BASED)
3442         arm_structure_size_boundary = 8;
3443     }
3444   else
3445     {
3446       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3447
3448       if (arm_structure_size_boundary != 8
3449           && arm_structure_size_boundary != 32
3450           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3451         {
3452           if (ARM_DOUBLEWORD_ALIGN)
3453             warning (0,
3454                      "structure size boundary can only be set to 8, 32 or 64");
3455           else
3456             warning (0, "structure size boundary can only be set to 8 or 32");
3457           arm_structure_size_boundary
3458             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3459         }
3460     }
3461
3462   if (TARGET_VXWORKS_RTP)
3463     {
3464       if (!global_options_set.x_arm_pic_data_is_text_relative)
3465         arm_pic_data_is_text_relative = 0;
3466     }
3467   else if (flag_pic
3468            && !arm_pic_data_is_text_relative
3469            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3470     /* When text & data segments don't have a fixed displacement, the
3471        intended use is with a single, read only, pic base register.
3472        Unless the user explicitly requested not to do that, set
3473        it.  */
3474     target_flags |= MASK_SINGLE_PIC_BASE;
3475
3476   /* If stack checking is disabled, we can use r10 as the PIC register,
3477      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3478   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3479     {
3480       if (TARGET_VXWORKS_RTP)
3481         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3482       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3483     }
3484
3485   if (flag_pic && TARGET_VXWORKS_RTP)
3486     arm_pic_register = 9;
3487
3488   if (arm_pic_register_string != NULL)
3489     {
3490       int pic_register = decode_reg_name (arm_pic_register_string);
3491
3492       if (!flag_pic)
3493         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3494
3495       /* Prevent the user from choosing an obviously stupid PIC register.  */
3496       else if (pic_register < 0 || call_used_regs[pic_register]
3497                || pic_register == HARD_FRAME_POINTER_REGNUM
3498                || pic_register == STACK_POINTER_REGNUM
3499                || pic_register >= PC_REGNUM
3500                || (TARGET_VXWORKS_RTP
3501                    && (unsigned int) pic_register != arm_pic_register))
3502         error ("unable to use %qs for PIC register", arm_pic_register_string);
3503       else
3504         arm_pic_register = pic_register;
3505     }
3506
3507   if (flag_pic)
3508     target_word_relocations = 1;
3509
3510   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3511   if (fix_cm3_ldrd == 2)
3512     {
3513       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3514         fix_cm3_ldrd = 1;
3515       else
3516         fix_cm3_ldrd = 0;
3517     }
3518
3519   /* Hot/Cold partitioning is not currently supported, since we can't
3520      handle literal pool placement in that case.  */
3521   if (flag_reorder_blocks_and_partition)
3522     {
3523       inform (input_location,
3524               "%<-freorder-blocks-and-partition%> not supported "
3525               "on this architecture");
3526       flag_reorder_blocks_and_partition = 0;
3527       flag_reorder_blocks = 1;
3528     }
3529
3530   if (flag_pic)
3531     /* Hoisting PIC address calculations more aggressively provides a small,
3532        but measurable, size reduction for PIC code.  Therefore, we decrease
3533        the bar for unrestricted expression hoisting to the cost of PIC address
3534        calculation, which is 2 instructions.  */
3535     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3536                            global_options.x_param_values,
3537                            global_options_set.x_param_values);
3538
3539   /* ARM EABI defaults to strict volatile bitfields.  */
3540   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3541       && abi_version_at_least(2))
3542     flag_strict_volatile_bitfields = 1;
3543
3544   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3545      have deemed it beneficial (signified by setting
3546      prefetch.num_slots to 1 or more).  */
3547   if (flag_prefetch_loop_arrays < 0
3548       && HAVE_prefetch
3549       && optimize >= 3
3550       && current_tune->prefetch.num_slots > 0)
3551     flag_prefetch_loop_arrays = 1;
3552
3553   /* Set up parameters to be used in prefetching algorithm.  Do not
3554      override the defaults unless we are tuning for a core we have
3555      researched values for.  */
3556   if (current_tune->prefetch.num_slots > 0)
3557     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3558                            current_tune->prefetch.num_slots,
3559                            global_options.x_param_values,
3560                            global_options_set.x_param_values);
3561   if (current_tune->prefetch.l1_cache_line_size >= 0)
3562     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3563                            current_tune->prefetch.l1_cache_line_size,
3564                            global_options.x_param_values,
3565                            global_options_set.x_param_values);
3566   if (current_tune->prefetch.l1_cache_size >= 0)
3567     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3568                            current_tune->prefetch.l1_cache_size,
3569                            global_options.x_param_values,
3570                            global_options_set.x_param_values);
3571
3572   /* Use Neon to perform 64-bits operations rather than core
3573      registers.  */
3574   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3575   if (use_neon_for_64bits == 1)
3576      prefer_neon_for_64bits = true;
3577
3578   /* Use the alternative scheduling-pressure algorithm by default.  */
3579   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3580                          global_options.x_param_values,
3581                          global_options_set.x_param_values);
3582
3583   /* Look through ready list and all of queue for instructions
3584      relevant for L2 auto-prefetcher.  */
3585   int param_sched_autopref_queue_depth;
3586
3587   switch (current_tune->sched_autopref)
3588     {
3589     case tune_params::SCHED_AUTOPREF_OFF:
3590       param_sched_autopref_queue_depth = -1;
3591       break;
3592
3593     case tune_params::SCHED_AUTOPREF_RANK:
3594       param_sched_autopref_queue_depth = 0;
3595       break;
3596
3597     case tune_params::SCHED_AUTOPREF_FULL:
3598       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3599       break;
3600
3601     default:
3602       gcc_unreachable ();
3603     }
3604
3605   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3606                          param_sched_autopref_queue_depth,
3607                          global_options.x_param_values,
3608                          global_options_set.x_param_values);
3609
3610   /* Currently, for slow flash data, we just disable literal pools.  We also
3611      disable it for pure-code.  */
3612   if (target_slow_flash_data || target_pure_code)
3613     arm_disable_literal_pool = true;
3614
3615   /* Disable scheduling fusion by default if it's not armv7 processor
3616      or doesn't prefer ldrd/strd.  */
3617   if (flag_schedule_fusion == 2
3618       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3619     flag_schedule_fusion = 0;
3620
3621   /* Need to remember initial options before they are overriden.  */
3622   init_optimize = build_optimization_node (&global_options);
3623
3624   arm_options_perform_arch_sanity_checks ();
3625   arm_option_override_internal (&global_options, &global_options_set);
3626   arm_option_check_internal (&global_options);
3627   arm_option_params_internal ();
3628
3629   /* Create the default target_options structure.  */
3630   target_option_default_node = target_option_current_node
3631     = build_target_option_node (&global_options);
3632
3633   /* Register global variables with the garbage collector.  */
3634   arm_add_gc_roots ();
3635
3636   /* Init initial mode for testing.  */
3637   thumb_flipper = TARGET_THUMB;
3638 }
3639
3640
3641 /* Reconfigure global status flags from the active_target.isa.  */
3642 void
3643 arm_option_reconfigure_globals (void)
3644 {
3645   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3646   arm_base_arch = arm_active_target.base_arch;
3647
3648   /* Initialize boolean versions of the architectural flags, for use
3649      in the arm.md file.  */
3650   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3651   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3652   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3653   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3654   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3655   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3656   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3657   arm_arch6m = arm_arch6 && !arm_arch_notm;
3658   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3659   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3660   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3661   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3662   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3663   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3664   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3665   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3666   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3667   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3668   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3669   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3670   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3671   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3672   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3673   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3674   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3675   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3676   if (arm_fp16_inst)
3677     {
3678       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3679         error ("selected fp16 options are incompatible");
3680       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3681     }
3682
3683   /* And finally, set up some quirks.  */
3684   arm_arch_no_volatile_ce
3685     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3686   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3687                                             isa_bit_quirk_armv6kz);
3688
3689   /* Use the cp15 method if it is available.  */
3690   if (target_thread_pointer == TP_AUTO)
3691     {
3692       if (arm_arch6k && !TARGET_THUMB1)
3693         target_thread_pointer = TP_CP15;
3694       else
3695         target_thread_pointer = TP_SOFT;
3696     }
3697 }
3698
3699 /* Perform some validation between the desired architecture and the rest of the
3700    options.  */
3701 void
3702 arm_options_perform_arch_sanity_checks (void)
3703 {
3704   /* V5T code we generate is completely interworking capable, so we turn off
3705      TARGET_INTERWORK here to avoid many tests later on.  */
3706
3707   /* XXX However, we must pass the right pre-processor defines to CPP
3708      or GLD can get confused.  This is a hack.  */
3709   if (TARGET_INTERWORK)
3710     arm_cpp_interwork = 1;
3711
3712   if (arm_arch5t)
3713     target_flags &= ~MASK_INTERWORK;
3714
3715   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3716     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3717
3718   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3719     error ("iwmmxt abi requires an iwmmxt capable cpu");
3720
3721   /* BPABI targets use linker tricks to allow interworking on cores
3722      without thumb support.  */
3723   if (TARGET_INTERWORK
3724       && !TARGET_BPABI
3725       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3726     {
3727       warning (0, "target CPU does not support interworking" );
3728       target_flags &= ~MASK_INTERWORK;
3729     }
3730
3731   /* If soft-float is specified then don't use FPU.  */
3732   if (TARGET_SOFT_FLOAT)
3733     arm_fpu_attr = FPU_NONE;
3734   else
3735     arm_fpu_attr = FPU_VFP;
3736
3737   if (TARGET_AAPCS_BASED)
3738     {
3739       if (TARGET_CALLER_INTERWORKING)
3740         error ("AAPCS does not support %<-mcaller-super-interworking%>");
3741       else
3742         if (TARGET_CALLEE_INTERWORKING)
3743           error ("AAPCS does not support %<-mcallee-super-interworking%>");
3744     }
3745
3746   /* __fp16 support currently assumes the core has ldrh.  */
3747   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3748     sorry ("__fp16 and no ldrh");
3749
3750   if (use_cmse && !arm_arch_cmse)
3751     error ("target CPU does not support ARMv8-M Security Extensions");
3752
3753   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3754      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3755   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3756     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3757
3758
3759   if (TARGET_AAPCS_BASED)
3760     {
3761       if (arm_abi == ARM_ABI_IWMMXT)
3762         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3763       else if (TARGET_HARD_FLOAT_ABI)
3764         {
3765           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3766           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3767             error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3768         }
3769       else
3770         arm_pcs_default = ARM_PCS_AAPCS;
3771     }
3772   else
3773     {
3774       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3775         sorry ("%<-mfloat-abi=hard%> and VFP");
3776
3777       if (arm_abi == ARM_ABI_APCS)
3778         arm_pcs_default = ARM_PCS_APCS;
3779       else
3780         arm_pcs_default = ARM_PCS_ATPCS;
3781     }
3782 }
3783
3784 static void
3785 arm_add_gc_roots (void)
3786 {
3787   gcc_obstack_init(&minipool_obstack);
3788   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3789 }
3790 \f
3791 /* A table of known ARM exception types.
3792    For use with the interrupt function attribute.  */
3793
3794 typedef struct
3795 {
3796   const char *const arg;
3797   const unsigned long return_value;
3798 }
3799 isr_attribute_arg;
3800
3801 static const isr_attribute_arg isr_attribute_args [] =
3802 {
3803   { "IRQ",   ARM_FT_ISR },
3804   { "irq",   ARM_FT_ISR },
3805   { "FIQ",   ARM_FT_FIQ },
3806   { "fiq",   ARM_FT_FIQ },
3807   { "ABORT", ARM_FT_ISR },
3808   { "abort", ARM_FT_ISR },
3809   { "ABORT", ARM_FT_ISR },
3810   { "abort", ARM_FT_ISR },
3811   { "UNDEF", ARM_FT_EXCEPTION },
3812   { "undef", ARM_FT_EXCEPTION },
3813   { "SWI",   ARM_FT_EXCEPTION },
3814   { "swi",   ARM_FT_EXCEPTION },
3815   { NULL,    ARM_FT_NORMAL }
3816 };
3817
3818 /* Returns the (interrupt) function type of the current
3819    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3820
3821 static unsigned long
3822 arm_isr_value (tree argument)
3823 {
3824   const isr_attribute_arg * ptr;
3825   const char *              arg;
3826
3827   if (!arm_arch_notm)
3828     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3829
3830   /* No argument - default to IRQ.  */
3831   if (argument == NULL_TREE)
3832     return ARM_FT_ISR;
3833
3834   /* Get the value of the argument.  */
3835   if (TREE_VALUE (argument) == NULL_TREE
3836       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3837     return ARM_FT_UNKNOWN;
3838
3839   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3840
3841   /* Check it against the list of known arguments.  */
3842   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3843     if (streq (arg, ptr->arg))
3844       return ptr->return_value;
3845
3846   /* An unrecognized interrupt type.  */
3847   return ARM_FT_UNKNOWN;
3848 }
3849
3850 /* Computes the type of the current function.  */
3851
3852 static unsigned long
3853 arm_compute_func_type (void)
3854 {
3855   unsigned long type = ARM_FT_UNKNOWN;
3856   tree a;
3857   tree attr;
3858
3859   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3860
3861   /* Decide if the current function is volatile.  Such functions
3862      never return, and many memory cycles can be saved by not storing
3863      register values that will never be needed again.  This optimization
3864      was added to speed up context switching in a kernel application.  */
3865   if (optimize > 0
3866       && (TREE_NOTHROW (current_function_decl)
3867           || !(flag_unwind_tables
3868                || (flag_exceptions
3869                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3870       && TREE_THIS_VOLATILE (current_function_decl))
3871     type |= ARM_FT_VOLATILE;
3872
3873   if (cfun->static_chain_decl != NULL)
3874     type |= ARM_FT_NESTED;
3875
3876   attr = DECL_ATTRIBUTES (current_function_decl);
3877
3878   a = lookup_attribute ("naked", attr);
3879   if (a != NULL_TREE)
3880     type |= ARM_FT_NAKED;
3881
3882   a = lookup_attribute ("isr", attr);
3883   if (a == NULL_TREE)
3884     a = lookup_attribute ("interrupt", attr);
3885
3886   if (a == NULL_TREE)
3887     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3888   else
3889     type |= arm_isr_value (TREE_VALUE (a));
3890
3891   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3892     type |= ARM_FT_CMSE_ENTRY;
3893
3894   return type;
3895 }
3896
3897 /* Returns the type of the current function.  */
3898
3899 unsigned long
3900 arm_current_func_type (void)
3901 {
3902   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3903     cfun->machine->func_type = arm_compute_func_type ();
3904
3905   return cfun->machine->func_type;
3906 }
3907
3908 bool
3909 arm_allocate_stack_slots_for_args (void)
3910 {
3911   /* Naked functions should not allocate stack slots for arguments.  */
3912   return !IS_NAKED (arm_current_func_type ());
3913 }
3914
3915 static bool
3916 arm_warn_func_return (tree decl)
3917 {
3918   /* Naked functions are implemented entirely in assembly, including the
3919      return sequence, so suppress warnings about this.  */
3920   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3921 }
3922
3923 \f
3924 /* Output assembler code for a block containing the constant parts
3925    of a trampoline, leaving space for the variable parts.
3926
3927    On the ARM, (if r8 is the static chain regnum, and remembering that
3928    referencing pc adds an offset of 8) the trampoline looks like:
3929            ldr          r8, [pc, #0]
3930            ldr          pc, [pc]
3931            .word        static chain value
3932            .word        function's address
3933    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3934
3935 static void
3936 arm_asm_trampoline_template (FILE *f)
3937 {
3938   fprintf (f, "\t.syntax unified\n");
3939
3940   if (TARGET_ARM)
3941     {
3942       fprintf (f, "\t.arm\n");
3943       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3944       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3945     }
3946   else if (TARGET_THUMB2)
3947     {
3948       fprintf (f, "\t.thumb\n");
3949       /* The Thumb-2 trampoline is similar to the arm implementation.
3950          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3951       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3952                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3953       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3954     }
3955   else
3956     {
3957       ASM_OUTPUT_ALIGN (f, 2);
3958       fprintf (f, "\t.code\t16\n");
3959       fprintf (f, ".Ltrampoline_start:\n");
3960       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3961       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3962       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3963       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3964       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3965       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3966     }
3967   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3968   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3969 }
3970
3971 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3972
3973 static void
3974 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3975 {
3976   rtx fnaddr, mem, a_tramp;
3977
3978   emit_block_move (m_tramp, assemble_trampoline_template (),
3979                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3980
3981   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3982   emit_move_insn (mem, chain_value);
3983
3984   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3985   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3986   emit_move_insn (mem, fnaddr);
3987
3988   a_tramp = XEXP (m_tramp, 0);
3989   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3990                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3991                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3992 }
3993
3994 /* Thumb trampolines should be entered in thumb mode, so set
3995    the bottom bit of the address.  */
3996
3997 static rtx
3998 arm_trampoline_adjust_address (rtx addr)
3999 {
4000   if (TARGET_THUMB)
4001     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4002                                 NULL, 0, OPTAB_LIB_WIDEN);
4003   return addr;
4004 }
4005 \f
4006 /* Return 1 if it is possible to return using a single instruction.
4007    If SIBLING is non-null, this is a test for a return before a sibling
4008    call.  SIBLING is the call insn, so we can examine its register usage.  */
4009
4010 int
4011 use_return_insn (int iscond, rtx sibling)
4012 {
4013   int regno;
4014   unsigned int func_type;
4015   unsigned long saved_int_regs;
4016   unsigned HOST_WIDE_INT stack_adjust;
4017   arm_stack_offsets *offsets;
4018
4019   /* Never use a return instruction before reload has run.  */
4020   if (!reload_completed)
4021     return 0;
4022
4023   func_type = arm_current_func_type ();
4024
4025   /* Naked, volatile and stack alignment functions need special
4026      consideration.  */
4027   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4028     return 0;
4029
4030   /* So do interrupt functions that use the frame pointer and Thumb
4031      interrupt functions.  */
4032   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4033     return 0;
4034
4035   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4036       && !optimize_function_for_size_p (cfun))
4037     return 0;
4038
4039   offsets = arm_get_frame_offsets ();
4040   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4041
4042   /* As do variadic functions.  */
4043   if (crtl->args.pretend_args_size
4044       || cfun->machine->uses_anonymous_args
4045       /* Or if the function calls __builtin_eh_return () */
4046       || crtl->calls_eh_return
4047       /* Or if the function calls alloca */
4048       || cfun->calls_alloca
4049       /* Or if there is a stack adjustment.  However, if the stack pointer
4050          is saved on the stack, we can use a pre-incrementing stack load.  */
4051       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4052                                  && stack_adjust == 4))
4053       /* Or if the static chain register was saved above the frame, under the
4054          assumption that the stack pointer isn't saved on the stack.  */
4055       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4056           && arm_compute_static_chain_stack_bytes() != 0))
4057     return 0;
4058
4059   saved_int_regs = offsets->saved_regs_mask;
4060
4061   /* Unfortunately, the insn
4062
4063        ldmib sp, {..., sp, ...}
4064
4065      triggers a bug on most SA-110 based devices, such that the stack
4066      pointer won't be correctly restored if the instruction takes a
4067      page fault.  We work around this problem by popping r3 along with
4068      the other registers, since that is never slower than executing
4069      another instruction.
4070
4071      We test for !arm_arch5t here, because code for any architecture
4072      less than this could potentially be run on one of the buggy
4073      chips.  */
4074   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4075     {
4076       /* Validate that r3 is a call-clobbered register (always true in
4077          the default abi) ...  */
4078       if (!call_used_regs[3])
4079         return 0;
4080
4081       /* ... that it isn't being used for a return value ... */
4082       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4083         return 0;
4084
4085       /* ... or for a tail-call argument ...  */
4086       if (sibling)
4087         {
4088           gcc_assert (CALL_P (sibling));
4089
4090           if (find_regno_fusage (sibling, USE, 3))
4091             return 0;
4092         }
4093
4094       /* ... and that there are no call-saved registers in r0-r2
4095          (always true in the default ABI).  */
4096       if (saved_int_regs & 0x7)
4097         return 0;
4098     }
4099
4100   /* Can't be done if interworking with Thumb, and any registers have been
4101      stacked.  */
4102   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4103     return 0;
4104
4105   /* On StrongARM, conditional returns are expensive if they aren't
4106      taken and multiple registers have been stacked.  */
4107   if (iscond && arm_tune_strongarm)
4108     {
4109       /* Conditional return when just the LR is stored is a simple
4110          conditional-load instruction, that's not expensive.  */
4111       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4112         return 0;
4113
4114       if (flag_pic
4115           && arm_pic_register != INVALID_REGNUM
4116           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4117         return 0;
4118     }
4119
4120   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4121      several instructions if anything needs to be popped.  */
4122   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4123     return 0;
4124
4125   /* If there are saved registers but the LR isn't saved, then we need
4126      two instructions for the return.  */
4127   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4128     return 0;
4129
4130   /* Can't be done if any of the VFP regs are pushed,
4131      since this also requires an insn.  */
4132   if (TARGET_HARD_FLOAT)
4133     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4134       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4135         return 0;
4136
4137   if (TARGET_REALLY_IWMMXT)
4138     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4139       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4140         return 0;
4141
4142   return 1;
4143 }
4144
4145 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4146    shrink-wrapping if possible.  This is the case if we need to emit a
4147    prologue, which we can test by looking at the offsets.  */
4148 bool
4149 use_simple_return_p (void)
4150 {
4151   arm_stack_offsets *offsets;
4152
4153   /* Note this function can be called before or after reload.  */
4154   if (!reload_completed)
4155     arm_compute_frame_layout ();
4156
4157   offsets = arm_get_frame_offsets ();
4158   return offsets->outgoing_args != 0;
4159 }
4160
4161 /* Return TRUE if int I is a valid immediate ARM constant.  */
4162
4163 int
4164 const_ok_for_arm (HOST_WIDE_INT i)
4165 {
4166   int lowbit;
4167
4168   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4169      be all zero, or all one.  */
4170   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4171       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4172           != ((~(unsigned HOST_WIDE_INT) 0)
4173               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4174     return FALSE;
4175
4176   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4177
4178   /* Fast return for 0 and small values.  We must do this for zero, since
4179      the code below can't handle that one case.  */
4180   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4181     return TRUE;
4182
4183   /* Get the number of trailing zeros.  */
4184   lowbit = ffs((int) i) - 1;
4185
4186   /* Only even shifts are allowed in ARM mode so round down to the
4187      nearest even number.  */
4188   if (TARGET_ARM)
4189     lowbit &= ~1;
4190
4191   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4192     return TRUE;
4193
4194   if (TARGET_ARM)
4195     {
4196       /* Allow rotated constants in ARM mode.  */
4197       if (lowbit <= 4
4198            && ((i & ~0xc000003f) == 0
4199                || (i & ~0xf000000f) == 0
4200                || (i & ~0xfc000003) == 0))
4201         return TRUE;
4202     }
4203   else if (TARGET_THUMB2)
4204     {
4205       HOST_WIDE_INT v;
4206
4207       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4208       v = i & 0xff;
4209       v |= v << 16;
4210       if (i == v || i == (v | (v << 8)))
4211         return TRUE;
4212
4213       /* Allow repeated pattern 0xXY00XY00.  */
4214       v = i & 0xff00;
4215       v |= v << 16;
4216       if (i == v)
4217         return TRUE;
4218     }
4219   else if (TARGET_HAVE_MOVT)
4220     {
4221       /* Thumb-1 Targets with MOVT.  */
4222       if (i > 0xffff)
4223         return FALSE;
4224       else
4225         return TRUE;
4226     }
4227
4228   return FALSE;
4229 }
4230
4231 /* Return true if I is a valid constant for the operation CODE.  */
4232 int
4233 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4234 {
4235   if (const_ok_for_arm (i))
4236     return 1;
4237
4238   switch (code)
4239     {
4240     case SET:
4241       /* See if we can use movw.  */
4242       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4243         return 1;
4244       else
4245         /* Otherwise, try mvn.  */
4246         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4247
4248     case PLUS:
4249       /* See if we can use addw or subw.  */
4250       if (TARGET_THUMB2
4251           && ((i & 0xfffff000) == 0
4252               || ((-i) & 0xfffff000) == 0))
4253         return 1;
4254       /* Fall through.  */
4255     case COMPARE:
4256     case EQ:
4257     case NE:
4258     case GT:
4259     case LE:
4260     case LT:
4261     case GE:
4262     case GEU:
4263     case LTU:
4264     case GTU:
4265     case LEU:
4266     case UNORDERED:
4267     case ORDERED:
4268     case UNEQ:
4269     case UNGE:
4270     case UNLT:
4271     case UNGT:
4272     case UNLE:
4273       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4274
4275     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4276     case XOR:
4277       return 0;
4278
4279     case IOR:
4280       if (TARGET_THUMB2)
4281         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4282       return 0;
4283
4284     case AND:
4285       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4286
4287     default:
4288       gcc_unreachable ();
4289     }
4290 }
4291
4292 /* Return true if I is a valid di mode constant for the operation CODE.  */
4293 int
4294 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4295 {
4296   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4297   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4298   rtx hi = GEN_INT (hi_val);
4299   rtx lo = GEN_INT (lo_val);
4300
4301   if (TARGET_THUMB1)
4302     return 0;
4303
4304   switch (code)
4305     {
4306     case AND:
4307     case IOR:
4308     case XOR:
4309       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4310               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4311     case PLUS:
4312       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4313
4314     default:
4315       return 0;
4316     }
4317 }
4318
4319 /* Emit a sequence of insns to handle a large constant.
4320    CODE is the code of the operation required, it can be any of SET, PLUS,
4321    IOR, AND, XOR, MINUS;
4322    MODE is the mode in which the operation is being performed;
4323    VAL is the integer to operate on;
4324    SOURCE is the other operand (a register, or a null-pointer for SET);
4325    SUBTARGETS means it is safe to create scratch registers if that will
4326    either produce a simpler sequence, or we will want to cse the values.
4327    Return value is the number of insns emitted.  */
4328
4329 /* ??? Tweak this for thumb2.  */
4330 int
4331 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4332                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4333 {
4334   rtx cond;
4335
4336   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4337     cond = COND_EXEC_TEST (PATTERN (insn));
4338   else
4339     cond = NULL_RTX;
4340
4341   if (subtargets || code == SET
4342       || (REG_P (target) && REG_P (source)
4343           && REGNO (target) != REGNO (source)))
4344     {
4345       /* After arm_reorg has been called, we can't fix up expensive
4346          constants by pushing them into memory so we must synthesize
4347          them in-line, regardless of the cost.  This is only likely to
4348          be more costly on chips that have load delay slots and we are
4349          compiling without running the scheduler (so no splitting
4350          occurred before the final instruction emission).
4351
4352          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4353       */
4354       if (!cfun->machine->after_arm_reorg
4355           && !cond
4356           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4357                                 1, 0)
4358               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4359                  + (code != SET))))
4360         {
4361           if (code == SET)
4362             {
4363               /* Currently SET is the only monadic value for CODE, all
4364                  the rest are diadic.  */
4365               if (TARGET_USE_MOVT)
4366                 arm_emit_movpair (target, GEN_INT (val));
4367               else
4368                 emit_set_insn (target, GEN_INT (val));
4369
4370               return 1;
4371             }
4372           else
4373             {
4374               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4375
4376               if (TARGET_USE_MOVT)
4377                 arm_emit_movpair (temp, GEN_INT (val));
4378               else
4379                 emit_set_insn (temp, GEN_INT (val));
4380
4381               /* For MINUS, the value is subtracted from, since we never
4382                  have subtraction of a constant.  */
4383               if (code == MINUS)
4384                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4385               else
4386                 emit_set_insn (target,
4387                                gen_rtx_fmt_ee (code, mode, source, temp));
4388               return 2;
4389             }
4390         }
4391     }
4392
4393   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4394                            1);
4395 }
4396
4397 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4398    ARM/THUMB2 immediates, and add up to VAL.
4399    Thr function return value gives the number of insns required.  */
4400 static int
4401 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4402                             struct four_ints *return_sequence)
4403 {
4404   int best_consecutive_zeros = 0;
4405   int i;
4406   int best_start = 0;
4407   int insns1, insns2;
4408   struct four_ints tmp_sequence;
4409
4410   /* If we aren't targeting ARM, the best place to start is always at
4411      the bottom, otherwise look more closely.  */
4412   if (TARGET_ARM)
4413     {
4414       for (i = 0; i < 32; i += 2)
4415         {
4416           int consecutive_zeros = 0;
4417
4418           if (!(val & (3 << i)))
4419             {
4420               while ((i < 32) && !(val & (3 << i)))
4421                 {
4422                   consecutive_zeros += 2;
4423                   i += 2;
4424                 }
4425               if (consecutive_zeros > best_consecutive_zeros)
4426                 {
4427                   best_consecutive_zeros = consecutive_zeros;
4428                   best_start = i - consecutive_zeros;
4429                 }
4430               i -= 2;
4431             }
4432         }
4433     }
4434
4435   /* So long as it won't require any more insns to do so, it's
4436      desirable to emit a small constant (in bits 0...9) in the last
4437      insn.  This way there is more chance that it can be combined with
4438      a later addressing insn to form a pre-indexed load or store
4439      operation.  Consider:
4440
4441            *((volatile int *)0xe0000100) = 1;
4442            *((volatile int *)0xe0000110) = 2;
4443
4444      We want this to wind up as:
4445
4446             mov rA, #0xe0000000
4447             mov rB, #1
4448             str rB, [rA, #0x100]
4449             mov rB, #2
4450             str rB, [rA, #0x110]
4451
4452      rather than having to synthesize both large constants from scratch.
4453
4454      Therefore, we calculate how many insns would be required to emit
4455      the constant starting from `best_start', and also starting from
4456      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4457      yield a shorter sequence, we may as well use zero.  */
4458   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4459   if (best_start != 0
4460       && ((HOST_WIDE_INT_1U << best_start) < val))
4461     {
4462       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4463       if (insns2 <= insns1)
4464         {
4465           *return_sequence = tmp_sequence;
4466           insns1 = insns2;
4467         }
4468     }
4469
4470   return insns1;
4471 }
4472
4473 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4474 static int
4475 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4476                              struct four_ints *return_sequence, int i)
4477 {
4478   int remainder = val & 0xffffffff;
4479   int insns = 0;
4480
4481   /* Try and find a way of doing the job in either two or three
4482      instructions.
4483
4484      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4485      location.  We start at position I.  This may be the MSB, or
4486      optimial_immediate_sequence may have positioned it at the largest block
4487      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4488      wrapping around to the top of the word when we drop off the bottom.
4489      In the worst case this code should produce no more than four insns.
4490
4491      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4492      constants, shifted to any arbitrary location.  We should always start
4493      at the MSB.  */
4494   do
4495     {
4496       int end;
4497       unsigned int b1, b2, b3, b4;
4498       unsigned HOST_WIDE_INT result;
4499       int loc;
4500
4501       gcc_assert (insns < 4);
4502
4503       if (i <= 0)
4504         i += 32;
4505
4506       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4507       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4508         {
4509           loc = i;
4510           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4511             /* We can use addw/subw for the last 12 bits.  */
4512             result = remainder;
4513           else
4514             {
4515               /* Use an 8-bit shifted/rotated immediate.  */
4516               end = i - 8;
4517               if (end < 0)
4518                 end += 32;
4519               result = remainder & ((0x0ff << end)
4520                                    | ((i < end) ? (0xff >> (32 - end))
4521                                                 : 0));
4522               i -= 8;
4523             }
4524         }
4525       else
4526         {
4527           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4528              arbitrary shifts.  */
4529           i -= TARGET_ARM ? 2 : 1;
4530           continue;
4531         }
4532
4533       /* Next, see if we can do a better job with a thumb2 replicated
4534          constant.
4535
4536          We do it this way around to catch the cases like 0x01F001E0 where
4537          two 8-bit immediates would work, but a replicated constant would
4538          make it worse.
4539
4540          TODO: 16-bit constants that don't clear all the bits, but still win.
4541          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4542       if (TARGET_THUMB2)
4543         {
4544           b1 = (remainder & 0xff000000) >> 24;
4545           b2 = (remainder & 0x00ff0000) >> 16;
4546           b3 = (remainder & 0x0000ff00) >> 8;
4547           b4 = remainder & 0xff;
4548
4549           if (loc > 24)
4550             {
4551               /* The 8-bit immediate already found clears b1 (and maybe b2),
4552                  but must leave b3 and b4 alone.  */
4553
4554               /* First try to find a 32-bit replicated constant that clears
4555                  almost everything.  We can assume that we can't do it in one,
4556                  or else we wouldn't be here.  */
4557               unsigned int tmp = b1 & b2 & b3 & b4;
4558               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4559                                   + (tmp << 24);
4560               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4561                                             + (tmp == b3) + (tmp == b4);
4562               if (tmp
4563                   && (matching_bytes >= 3
4564                       || (matching_bytes == 2
4565                           && const_ok_for_op (remainder & ~tmp2, code))))
4566                 {
4567                   /* At least 3 of the bytes match, and the fourth has at
4568                      least as many bits set, or two of the bytes match
4569                      and it will only require one more insn to finish.  */
4570                   result = tmp2;
4571                   i = tmp != b1 ? 32
4572                       : tmp != b2 ? 24
4573                       : tmp != b3 ? 16
4574                       : 8;
4575                 }
4576
4577               /* Second, try to find a 16-bit replicated constant that can
4578                  leave three of the bytes clear.  If b2 or b4 is already
4579                  zero, then we can.  If the 8-bit from above would not
4580                  clear b2 anyway, then we still win.  */
4581               else if (b1 == b3 && (!b2 || !b4
4582                                || (remainder & 0x00ff0000 & ~result)))
4583                 {
4584                   result = remainder & 0xff00ff00;
4585                   i = 24;
4586                 }
4587             }
4588           else if (loc > 16)
4589             {
4590               /* The 8-bit immediate already found clears b2 (and maybe b3)
4591                  and we don't get here unless b1 is alredy clear, but it will
4592                  leave b4 unchanged.  */
4593
4594               /* If we can clear b2 and b4 at once, then we win, since the
4595                  8-bits couldn't possibly reach that far.  */
4596               if (b2 == b4)
4597                 {
4598                   result = remainder & 0x00ff00ff;
4599                   i = 16;
4600                 }
4601             }
4602         }
4603
4604       return_sequence->i[insns++] = result;
4605       remainder &= ~result;
4606
4607       if (code == SET || code == MINUS)
4608         code = PLUS;
4609     }
4610   while (remainder);
4611
4612   return insns;
4613 }
4614
4615 /* Emit an instruction with the indicated PATTERN.  If COND is
4616    non-NULL, conditionalize the execution of the instruction on COND
4617    being true.  */
4618
4619 static void
4620 emit_constant_insn (rtx cond, rtx pattern)
4621 {
4622   if (cond)
4623     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4624   emit_insn (pattern);
4625 }
4626
4627 /* As above, but extra parameter GENERATE which, if clear, suppresses
4628    RTL generation.  */
4629
4630 static int
4631 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4632                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4633                   int subtargets, int generate)
4634 {
4635   int can_invert = 0;
4636   int can_negate = 0;
4637   int final_invert = 0;
4638   int i;
4639   int set_sign_bit_copies = 0;
4640   int clear_sign_bit_copies = 0;
4641   int clear_zero_bit_copies = 0;
4642   int set_zero_bit_copies = 0;
4643   int insns = 0, neg_insns, inv_insns;
4644   unsigned HOST_WIDE_INT temp1, temp2;
4645   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4646   struct four_ints *immediates;
4647   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4648
4649   /* Find out which operations are safe for a given CODE.  Also do a quick
4650      check for degenerate cases; these can occur when DImode operations
4651      are split.  */
4652   switch (code)
4653     {
4654     case SET:
4655       can_invert = 1;
4656       break;
4657
4658     case PLUS:
4659       can_negate = 1;
4660       break;
4661
4662     case IOR:
4663       if (remainder == 0xffffffff)
4664         {
4665           if (generate)
4666             emit_constant_insn (cond,
4667                                 gen_rtx_SET (target,
4668                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4669           return 1;
4670         }
4671
4672       if (remainder == 0)
4673         {
4674           if (reload_completed && rtx_equal_p (target, source))
4675             return 0;
4676
4677           if (generate)
4678             emit_constant_insn (cond, gen_rtx_SET (target, source));
4679           return 1;
4680         }
4681       break;
4682
4683     case AND:
4684       if (remainder == 0)
4685         {
4686           if (generate)
4687             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4688           return 1;
4689         }
4690       if (remainder == 0xffffffff)
4691         {
4692           if (reload_completed && rtx_equal_p (target, source))
4693             return 0;
4694           if (generate)
4695             emit_constant_insn (cond, gen_rtx_SET (target, source));
4696           return 1;
4697         }
4698       can_invert = 1;
4699       break;
4700
4701     case XOR:
4702       if (remainder == 0)
4703         {
4704           if (reload_completed && rtx_equal_p (target, source))
4705             return 0;
4706           if (generate)
4707             emit_constant_insn (cond, gen_rtx_SET (target, source));
4708           return 1;
4709         }
4710
4711       if (remainder == 0xffffffff)
4712         {
4713           if (generate)
4714             emit_constant_insn (cond,
4715                                 gen_rtx_SET (target,
4716                                              gen_rtx_NOT (mode, source)));
4717           return 1;
4718         }
4719       final_invert = 1;
4720       break;
4721
4722     case MINUS:
4723       /* We treat MINUS as (val - source), since (source - val) is always
4724          passed as (source + (-val)).  */
4725       if (remainder == 0)
4726         {
4727           if (generate)
4728             emit_constant_insn (cond,
4729                                 gen_rtx_SET (target,
4730                                              gen_rtx_NEG (mode, source)));
4731           return 1;
4732         }
4733       if (const_ok_for_arm (val))
4734         {
4735           if (generate)
4736             emit_constant_insn (cond,
4737                                 gen_rtx_SET (target,
4738                                              gen_rtx_MINUS (mode, GEN_INT (val),
4739                                                             source)));
4740           return 1;
4741         }
4742
4743       break;
4744
4745     default:
4746       gcc_unreachable ();
4747     }
4748
4749   /* If we can do it in one insn get out quickly.  */
4750   if (const_ok_for_op (val, code))
4751     {
4752       if (generate)
4753         emit_constant_insn (cond,
4754                             gen_rtx_SET (target,
4755                                          (source
4756                                           ? gen_rtx_fmt_ee (code, mode, source,
4757                                                             GEN_INT (val))
4758                                           : GEN_INT (val))));
4759       return 1;
4760     }
4761
4762   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4763      insn.  */
4764   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4765       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4766     {
4767       if (generate)
4768         {
4769           if (mode == SImode && i == 16)
4770             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4771                smaller insn.  */
4772             emit_constant_insn (cond,
4773                                 gen_zero_extendhisi2
4774                                 (target, gen_lowpart (HImode, source)));
4775           else
4776             /* Extz only supports SImode, but we can coerce the operands
4777                into that mode.  */
4778             emit_constant_insn (cond,
4779                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4780                                               gen_lowpart (SImode, source),
4781                                               GEN_INT (i), const0_rtx));
4782         }
4783
4784       return 1;
4785     }
4786
4787   /* Calculate a few attributes that may be useful for specific
4788      optimizations.  */
4789   /* Count number of leading zeros.  */
4790   for (i = 31; i >= 0; i--)
4791     {
4792       if ((remainder & (1 << i)) == 0)
4793         clear_sign_bit_copies++;
4794       else
4795         break;
4796     }
4797
4798   /* Count number of leading 1's.  */
4799   for (i = 31; i >= 0; i--)
4800     {
4801       if ((remainder & (1 << i)) != 0)
4802         set_sign_bit_copies++;
4803       else
4804         break;
4805     }
4806
4807   /* Count number of trailing zero's.  */
4808   for (i = 0; i <= 31; i++)
4809     {
4810       if ((remainder & (1 << i)) == 0)
4811         clear_zero_bit_copies++;
4812       else
4813         break;
4814     }
4815
4816   /* Count number of trailing 1's.  */
4817   for (i = 0; i <= 31; i++)
4818     {
4819       if ((remainder & (1 << i)) != 0)
4820         set_zero_bit_copies++;
4821       else
4822         break;
4823     }
4824
4825   switch (code)
4826     {
4827     case SET:
4828       /* See if we can do this by sign_extending a constant that is known
4829          to be negative.  This is a good, way of doing it, since the shift
4830          may well merge into a subsequent insn.  */
4831       if (set_sign_bit_copies > 1)
4832         {
4833           if (const_ok_for_arm
4834               (temp1 = ARM_SIGN_EXTEND (remainder
4835                                         << (set_sign_bit_copies - 1))))
4836             {
4837               if (generate)
4838                 {
4839                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4840                   emit_constant_insn (cond,
4841                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4842                   emit_constant_insn (cond,
4843                                       gen_ashrsi3 (target, new_src,
4844                                                    GEN_INT (set_sign_bit_copies - 1)));
4845                 }
4846               return 2;
4847             }
4848           /* For an inverted constant, we will need to set the low bits,
4849              these will be shifted out of harm's way.  */
4850           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4851           if (const_ok_for_arm (~temp1))
4852             {
4853               if (generate)
4854                 {
4855                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4856                   emit_constant_insn (cond,
4857                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4858                   emit_constant_insn (cond,
4859                                       gen_ashrsi3 (target, new_src,
4860                                                    GEN_INT (set_sign_bit_copies - 1)));
4861                 }
4862               return 2;
4863             }
4864         }
4865
4866       /* See if we can calculate the value as the difference between two
4867          valid immediates.  */
4868       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4869         {
4870           int topshift = clear_sign_bit_copies & ~1;
4871
4872           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4873                                    & (0xff000000 >> topshift));
4874
4875           /* If temp1 is zero, then that means the 9 most significant
4876              bits of remainder were 1 and we've caused it to overflow.
4877              When topshift is 0 we don't need to do anything since we
4878              can borrow from 'bit 32'.  */
4879           if (temp1 == 0 && topshift != 0)
4880             temp1 = 0x80000000 >> (topshift - 1);
4881
4882           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4883
4884           if (const_ok_for_arm (temp2))
4885             {
4886               if (generate)
4887                 {
4888                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4889                   emit_constant_insn (cond,
4890                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4891                   emit_constant_insn (cond,
4892                                       gen_addsi3 (target, new_src,
4893                                                   GEN_INT (-temp2)));
4894                 }
4895
4896               return 2;
4897             }
4898         }
4899
4900       /* See if we can generate this by setting the bottom (or the top)
4901          16 bits, and then shifting these into the other half of the
4902          word.  We only look for the simplest cases, to do more would cost
4903          too much.  Be careful, however, not to generate this when the
4904          alternative would take fewer insns.  */
4905       if (val & 0xffff0000)
4906         {
4907           temp1 = remainder & 0xffff0000;
4908           temp2 = remainder & 0x0000ffff;
4909
4910           /* Overlaps outside this range are best done using other methods.  */
4911           for (i = 9; i < 24; i++)
4912             {
4913               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4914                   && !const_ok_for_arm (temp2))
4915                 {
4916                   rtx new_src = (subtargets
4917                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4918                                  : target);
4919                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4920                                             source, subtargets, generate);
4921                   source = new_src;
4922                   if (generate)
4923                     emit_constant_insn
4924                       (cond,
4925                        gen_rtx_SET
4926                        (target,
4927                         gen_rtx_IOR (mode,
4928                                      gen_rtx_ASHIFT (mode, source,
4929                                                      GEN_INT (i)),
4930                                      source)));
4931                   return insns + 1;
4932                 }
4933             }
4934
4935           /* Don't duplicate cases already considered.  */
4936           for (i = 17; i < 24; i++)
4937             {
4938               if (((temp1 | (temp1 >> i)) == remainder)
4939                   && !const_ok_for_arm (temp1))
4940                 {
4941                   rtx new_src = (subtargets
4942                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4943                                  : target);
4944                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4945                                             source, subtargets, generate);
4946                   source = new_src;
4947                   if (generate)
4948                     emit_constant_insn
4949                       (cond,
4950                        gen_rtx_SET (target,
4951                                     gen_rtx_IOR
4952                                     (mode,
4953                                      gen_rtx_LSHIFTRT (mode, source,
4954                                                        GEN_INT (i)),
4955                                      source)));
4956                   return insns + 1;
4957                 }
4958             }
4959         }
4960       break;
4961
4962     case IOR:
4963     case XOR:
4964       /* If we have IOR or XOR, and the constant can be loaded in a
4965          single instruction, and we can find a temporary to put it in,
4966          then this can be done in two instructions instead of 3-4.  */
4967       if (subtargets
4968           /* TARGET can't be NULL if SUBTARGETS is 0 */
4969           || (reload_completed && !reg_mentioned_p (target, source)))
4970         {
4971           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4972             {
4973               if (generate)
4974                 {
4975                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4976
4977                   emit_constant_insn (cond,
4978                                       gen_rtx_SET (sub, GEN_INT (val)));
4979                   emit_constant_insn (cond,
4980                                       gen_rtx_SET (target,
4981                                                    gen_rtx_fmt_ee (code, mode,
4982                                                                    source, sub)));
4983                 }
4984               return 2;
4985             }
4986         }
4987
4988       if (code == XOR)
4989         break;
4990
4991       /*  Convert.
4992           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4993                              and the remainder 0s for e.g. 0xfff00000)
4994           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4995
4996           This can be done in 2 instructions by using shifts with mov or mvn.
4997           e.g. for
4998           x = x | 0xfff00000;
4999           we generate.
5000           mvn   r0, r0, asl #12
5001           mvn   r0, r0, lsr #12  */
5002       if (set_sign_bit_copies > 8
5003           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5004         {
5005           if (generate)
5006             {
5007               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5008               rtx shift = GEN_INT (set_sign_bit_copies);
5009
5010               emit_constant_insn
5011                 (cond,
5012                  gen_rtx_SET (sub,
5013                               gen_rtx_NOT (mode,
5014                                            gen_rtx_ASHIFT (mode,
5015                                                            source,
5016                                                            shift))));
5017               emit_constant_insn
5018                 (cond,
5019                  gen_rtx_SET (target,
5020                               gen_rtx_NOT (mode,
5021                                            gen_rtx_LSHIFTRT (mode, sub,
5022                                                              shift))));
5023             }
5024           return 2;
5025         }
5026
5027       /* Convert
5028           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5029            to
5030           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5031
5032           For eg. r0 = r0 | 0xfff
5033                mvn      r0, r0, lsr #12
5034                mvn      r0, r0, asl #12
5035
5036       */
5037       if (set_zero_bit_copies > 8
5038           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5039         {
5040           if (generate)
5041             {
5042               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5043               rtx shift = GEN_INT (set_zero_bit_copies);
5044
5045               emit_constant_insn
5046                 (cond,
5047                  gen_rtx_SET (sub,
5048                               gen_rtx_NOT (mode,
5049                                            gen_rtx_LSHIFTRT (mode,
5050                                                              source,
5051                                                              shift))));
5052               emit_constant_insn
5053                 (cond,
5054                  gen_rtx_SET (target,
5055                               gen_rtx_NOT (mode,
5056                                            gen_rtx_ASHIFT (mode, sub,
5057                                                            shift))));
5058             }
5059           return 2;
5060         }
5061
5062       /* This will never be reached for Thumb2 because orn is a valid
5063          instruction. This is for Thumb1 and the ARM 32 bit cases.
5064
5065          x = y | constant (such that ~constant is a valid constant)
5066          Transform this to
5067          x = ~(~y & ~constant).
5068       */
5069       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5070         {
5071           if (generate)
5072             {
5073               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5074               emit_constant_insn (cond,
5075                                   gen_rtx_SET (sub,
5076                                                gen_rtx_NOT (mode, source)));
5077               source = sub;
5078               if (subtargets)
5079                 sub = gen_reg_rtx (mode);
5080               emit_constant_insn (cond,
5081                                   gen_rtx_SET (sub,
5082                                                gen_rtx_AND (mode, source,
5083                                                             GEN_INT (temp1))));
5084               emit_constant_insn (cond,
5085                                   gen_rtx_SET (target,
5086                                                gen_rtx_NOT (mode, sub)));
5087             }
5088           return 3;
5089         }
5090       break;
5091
5092     case AND:
5093       /* See if two shifts will do 2 or more insn's worth of work.  */
5094       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5095         {
5096           HOST_WIDE_INT shift_mask = ((0xffffffff
5097                                        << (32 - clear_sign_bit_copies))
5098                                       & 0xffffffff);
5099
5100           if ((remainder | shift_mask) != 0xffffffff)
5101             {
5102               HOST_WIDE_INT new_val
5103                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5104
5105               if (generate)
5106                 {
5107                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5108                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5109                                             new_src, source, subtargets, 1);
5110                   source = new_src;
5111                 }
5112               else
5113                 {
5114                   rtx targ = subtargets ? NULL_RTX : target;
5115                   insns = arm_gen_constant (AND, mode, cond, new_val,
5116                                             targ, source, subtargets, 0);
5117                 }
5118             }
5119
5120           if (generate)
5121             {
5122               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5123               rtx shift = GEN_INT (clear_sign_bit_copies);
5124
5125               emit_insn (gen_ashlsi3 (new_src, source, shift));
5126               emit_insn (gen_lshrsi3 (target, new_src, shift));
5127             }
5128
5129           return insns + 2;
5130         }
5131
5132       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5133         {
5134           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5135
5136           if ((remainder | shift_mask) != 0xffffffff)
5137             {
5138               HOST_WIDE_INT new_val
5139                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5140               if (generate)
5141                 {
5142                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5143
5144                   insns = arm_gen_constant (AND, mode, cond, new_val,
5145                                             new_src, source, subtargets, 1);
5146                   source = new_src;
5147                 }
5148               else
5149                 {
5150                   rtx targ = subtargets ? NULL_RTX : target;
5151
5152                   insns = arm_gen_constant (AND, mode, cond, new_val,
5153                                             targ, source, subtargets, 0);
5154                 }
5155             }
5156
5157           if (generate)
5158             {
5159               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5160               rtx shift = GEN_INT (clear_zero_bit_copies);
5161
5162               emit_insn (gen_lshrsi3 (new_src, source, shift));
5163               emit_insn (gen_ashlsi3 (target, new_src, shift));
5164             }
5165
5166           return insns + 2;
5167         }
5168
5169       break;
5170
5171     default:
5172       break;
5173     }
5174
5175   /* Calculate what the instruction sequences would be if we generated it
5176      normally, negated, or inverted.  */
5177   if (code == AND)
5178     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5179     insns = 99;
5180   else
5181     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5182
5183   if (can_negate)
5184     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5185                                             &neg_immediates);
5186   else
5187     neg_insns = 99;
5188
5189   if (can_invert || final_invert)
5190     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5191                                             &inv_immediates);
5192   else
5193     inv_insns = 99;
5194
5195   immediates = &pos_immediates;
5196
5197   /* Is the negated immediate sequence more efficient?  */
5198   if (neg_insns < insns && neg_insns <= inv_insns)
5199     {
5200       insns = neg_insns;
5201       immediates = &neg_immediates;
5202     }
5203   else
5204     can_negate = 0;
5205
5206   /* Is the inverted immediate sequence more efficient?
5207      We must allow for an extra NOT instruction for XOR operations, although
5208      there is some chance that the final 'mvn' will get optimized later.  */
5209   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5210     {
5211       insns = inv_insns;
5212       immediates = &inv_immediates;
5213     }
5214   else
5215     {
5216       can_invert = 0;
5217       final_invert = 0;
5218     }
5219
5220   /* Now output the chosen sequence as instructions.  */
5221   if (generate)
5222     {
5223       for (i = 0; i < insns; i++)
5224         {
5225           rtx new_src, temp1_rtx;
5226
5227           temp1 = immediates->i[i];
5228
5229           if (code == SET || code == MINUS)
5230             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5231           else if ((final_invert || i < (insns - 1)) && subtargets)
5232             new_src = gen_reg_rtx (mode);
5233           else
5234             new_src = target;
5235
5236           if (can_invert)
5237             temp1 = ~temp1;
5238           else if (can_negate)
5239             temp1 = -temp1;
5240
5241           temp1 = trunc_int_for_mode (temp1, mode);
5242           temp1_rtx = GEN_INT (temp1);
5243
5244           if (code == SET)
5245             ;
5246           else if (code == MINUS)
5247             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5248           else
5249             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5250
5251           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5252           source = new_src;
5253
5254           if (code == SET)
5255             {
5256               can_negate = can_invert;
5257               can_invert = 0;
5258               code = PLUS;
5259             }
5260           else if (code == MINUS)
5261             code = PLUS;
5262         }
5263     }
5264
5265   if (final_invert)
5266     {
5267       if (generate)
5268         emit_constant_insn (cond, gen_rtx_SET (target,
5269                                                gen_rtx_NOT (mode, source)));
5270       insns++;
5271     }
5272
5273   return insns;
5274 }
5275
5276 /* Canonicalize a comparison so that we are more likely to recognize it.
5277    This can be done for a few constant compares, where we can make the
5278    immediate value easier to load.  */
5279
5280 static void
5281 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5282                              bool op0_preserve_value)
5283 {
5284   machine_mode mode;
5285   unsigned HOST_WIDE_INT i, maxval;
5286
5287   mode = GET_MODE (*op0);
5288   if (mode == VOIDmode)
5289     mode = GET_MODE (*op1);
5290
5291   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5292
5293   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5294      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5295      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5296      for GTU/LEU in Thumb mode.  */
5297   if (mode == DImode)
5298     {
5299
5300       if (*code == GT || *code == LE
5301           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5302         {
5303           /* Missing comparison.  First try to use an available
5304              comparison.  */
5305           if (CONST_INT_P (*op1))
5306             {
5307               i = INTVAL (*op1);
5308               switch (*code)
5309                 {
5310                 case GT:
5311                 case LE:
5312                   if (i != maxval
5313                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5314                     {
5315                       *op1 = GEN_INT (i + 1);
5316                       *code = *code == GT ? GE : LT;
5317                       return;
5318                     }
5319                   break;
5320                 case GTU:
5321                 case LEU:
5322                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5323                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5324                     {
5325                       *op1 = GEN_INT (i + 1);
5326                       *code = *code == GTU ? GEU : LTU;
5327                       return;
5328                     }
5329                   break;
5330                 default:
5331                   gcc_unreachable ();
5332                 }
5333             }
5334
5335           /* If that did not work, reverse the condition.  */
5336           if (!op0_preserve_value)
5337             {
5338               std::swap (*op0, *op1);
5339               *code = (int)swap_condition ((enum rtx_code)*code);
5340             }
5341         }
5342       return;
5343     }
5344
5345   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5346      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5347      to facilitate possible combining with a cmp into 'ands'.  */
5348   if (mode == SImode
5349       && GET_CODE (*op0) == ZERO_EXTEND
5350       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5351       && GET_MODE (XEXP (*op0, 0)) == QImode
5352       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5353       && subreg_lowpart_p (XEXP (*op0, 0))
5354       && *op1 == const0_rtx)
5355     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5356                         GEN_INT (255));
5357
5358   /* Comparisons smaller than DImode.  Only adjust comparisons against
5359      an out-of-range constant.  */
5360   if (!CONST_INT_P (*op1)
5361       || const_ok_for_arm (INTVAL (*op1))
5362       || const_ok_for_arm (- INTVAL (*op1)))
5363     return;
5364
5365   i = INTVAL (*op1);
5366
5367   switch (*code)
5368     {
5369     case EQ:
5370     case NE:
5371       return;
5372
5373     case GT:
5374     case LE:
5375       if (i != maxval
5376           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5377         {
5378           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5379           *code = *code == GT ? GE : LT;
5380           return;
5381         }
5382       break;
5383
5384     case GE:
5385     case LT:
5386       if (i != ~maxval
5387           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5388         {
5389           *op1 = GEN_INT (i - 1);
5390           *code = *code == GE ? GT : LE;
5391           return;
5392         }
5393       break;
5394
5395     case GTU:
5396     case LEU:
5397       if (i != ~((unsigned HOST_WIDE_INT) 0)
5398           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5399         {
5400           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5401           *code = *code == GTU ? GEU : LTU;
5402           return;
5403         }
5404       break;
5405
5406     case GEU:
5407     case LTU:
5408       if (i != 0
5409           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5410         {
5411           *op1 = GEN_INT (i - 1);
5412           *code = *code == GEU ? GTU : LEU;
5413           return;
5414         }
5415       break;
5416
5417     default:
5418       gcc_unreachable ();
5419     }
5420 }
5421
5422
5423 /* Define how to find the value returned by a function.  */
5424
5425 static rtx
5426 arm_function_value(const_tree type, const_tree func,
5427                    bool outgoing ATTRIBUTE_UNUSED)
5428 {
5429   machine_mode mode;
5430   int unsignedp ATTRIBUTE_UNUSED;
5431   rtx r ATTRIBUTE_UNUSED;
5432
5433   mode = TYPE_MODE (type);
5434
5435   if (TARGET_AAPCS_BASED)
5436     return aapcs_allocate_return_reg (mode, type, func);
5437
5438   /* Promote integer types.  */
5439   if (INTEGRAL_TYPE_P (type))
5440     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5441
5442   /* Promotes small structs returned in a register to full-word size
5443      for big-endian AAPCS.  */
5444   if (arm_return_in_msb (type))
5445     {
5446       HOST_WIDE_INT size = int_size_in_bytes (type);
5447       if (size % UNITS_PER_WORD != 0)
5448         {
5449           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5450           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5451         }
5452     }
5453
5454   return arm_libcall_value_1 (mode);
5455 }
5456
5457 /* libcall hashtable helpers.  */
5458
5459 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5460 {
5461   static inline hashval_t hash (const rtx_def *);
5462   static inline bool equal (const rtx_def *, const rtx_def *);
5463   static inline void remove (rtx_def *);
5464 };
5465
5466 inline bool
5467 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5468 {
5469   return rtx_equal_p (p1, p2);
5470 }
5471
5472 inline hashval_t
5473 libcall_hasher::hash (const rtx_def *p1)
5474 {
5475   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5476 }
5477
5478 typedef hash_table<libcall_hasher> libcall_table_type;
5479
5480 static void
5481 add_libcall (libcall_table_type *htab, rtx libcall)
5482 {
5483   *htab->find_slot (libcall, INSERT) = libcall;
5484 }
5485
5486 static bool
5487 arm_libcall_uses_aapcs_base (const_rtx libcall)
5488 {
5489   static bool init_done = false;
5490   static libcall_table_type *libcall_htab = NULL;
5491
5492   if (!init_done)
5493     {
5494       init_done = true;
5495
5496       libcall_htab = new libcall_table_type (31);
5497       add_libcall (libcall_htab,
5498                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5499       add_libcall (libcall_htab,
5500                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5501       add_libcall (libcall_htab,
5502                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5503       add_libcall (libcall_htab,
5504                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5505
5506       add_libcall (libcall_htab,
5507                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5508       add_libcall (libcall_htab,
5509                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5510       add_libcall (libcall_htab,
5511                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5512       add_libcall (libcall_htab,
5513                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5514
5515       add_libcall (libcall_htab,
5516                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5517       add_libcall (libcall_htab,
5518                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5519       add_libcall (libcall_htab,
5520                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5521       add_libcall (libcall_htab,
5522                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5523       add_libcall (libcall_htab,
5524                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5525       add_libcall (libcall_htab,
5526                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5527       add_libcall (libcall_htab,
5528                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5529       add_libcall (libcall_htab,
5530                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5531
5532       /* Values from double-precision helper functions are returned in core
5533          registers if the selected core only supports single-precision
5534          arithmetic, even if we are using the hard-float ABI.  The same is
5535          true for single-precision helpers, but we will never be using the
5536          hard-float ABI on a CPU which doesn't support single-precision
5537          operations in hardware.  */
5538       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5539       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5540       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5541       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5542       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5543       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5544       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5545       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5546       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5547       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5548       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5549       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5550                                                         SFmode));
5551       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5552                                                         DFmode));
5553       add_libcall (libcall_htab,
5554                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5555     }
5556
5557   return libcall && libcall_htab->find (libcall) != NULL;
5558 }
5559
5560 static rtx
5561 arm_libcall_value_1 (machine_mode mode)
5562 {
5563   if (TARGET_AAPCS_BASED)
5564     return aapcs_libcall_value (mode);
5565   else if (TARGET_IWMMXT_ABI
5566            && arm_vector_mode_supported_p (mode))
5567     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5568   else
5569     return gen_rtx_REG (mode, ARG_REGISTER (1));
5570 }
5571
5572 /* Define how to find the value returned by a library function
5573    assuming the value has mode MODE.  */
5574
5575 static rtx
5576 arm_libcall_value (machine_mode mode, const_rtx libcall)
5577 {
5578   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5579       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5580     {
5581       /* The following libcalls return their result in integer registers,
5582          even though they return a floating point value.  */
5583       if (arm_libcall_uses_aapcs_base (libcall))
5584         return gen_rtx_REG (mode, ARG_REGISTER(1));
5585
5586     }
5587
5588   return arm_libcall_value_1 (mode);
5589 }
5590
5591 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5592
5593 static bool
5594 arm_function_value_regno_p (const unsigned int regno)
5595 {
5596   if (regno == ARG_REGISTER (1)
5597       || (TARGET_32BIT
5598           && TARGET_AAPCS_BASED
5599           && TARGET_HARD_FLOAT
5600           && regno == FIRST_VFP_REGNUM)
5601       || (TARGET_IWMMXT_ABI
5602           && regno == FIRST_IWMMXT_REGNUM))
5603     return true;
5604
5605   return false;
5606 }
5607
5608 /* Determine the amount of memory needed to store the possible return
5609    registers of an untyped call.  */
5610 int
5611 arm_apply_result_size (void)
5612 {
5613   int size = 16;
5614
5615   if (TARGET_32BIT)
5616     {
5617       if (TARGET_HARD_FLOAT_ABI)
5618         size += 32;
5619       if (TARGET_IWMMXT_ABI)
5620         size += 8;
5621     }
5622
5623   return size;
5624 }
5625
5626 /* Decide whether TYPE should be returned in memory (true)
5627    or in a register (false).  FNTYPE is the type of the function making
5628    the call.  */
5629 static bool
5630 arm_return_in_memory (const_tree type, const_tree fntype)
5631 {
5632   HOST_WIDE_INT size;
5633
5634   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5635
5636   if (TARGET_AAPCS_BASED)
5637     {
5638       /* Simple, non-aggregate types (ie not including vectors and
5639          complex) are always returned in a register (or registers).
5640          We don't care about which register here, so we can short-cut
5641          some of the detail.  */
5642       if (!AGGREGATE_TYPE_P (type)
5643           && TREE_CODE (type) != VECTOR_TYPE
5644           && TREE_CODE (type) != COMPLEX_TYPE)
5645         return false;
5646
5647       /* Any return value that is no larger than one word can be
5648          returned in r0.  */
5649       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5650         return false;
5651
5652       /* Check any available co-processors to see if they accept the
5653          type as a register candidate (VFP, for example, can return
5654          some aggregates in consecutive registers).  These aren't
5655          available if the call is variadic.  */
5656       if (aapcs_select_return_coproc (type, fntype) >= 0)
5657         return false;
5658
5659       /* Vector values should be returned using ARM registers, not
5660          memory (unless they're over 16 bytes, which will break since
5661          we only have four call-clobbered registers to play with).  */
5662       if (TREE_CODE (type) == VECTOR_TYPE)
5663         return (size < 0 || size > (4 * UNITS_PER_WORD));
5664
5665       /* The rest go in memory.  */
5666       return true;
5667     }
5668
5669   if (TREE_CODE (type) == VECTOR_TYPE)
5670     return (size < 0 || size > (4 * UNITS_PER_WORD));
5671
5672   if (!AGGREGATE_TYPE_P (type) &&
5673       (TREE_CODE (type) != VECTOR_TYPE))
5674     /* All simple types are returned in registers.  */
5675     return false;
5676
5677   if (arm_abi != ARM_ABI_APCS)
5678     {
5679       /* ATPCS and later return aggregate types in memory only if they are
5680          larger than a word (or are variable size).  */
5681       return (size < 0 || size > UNITS_PER_WORD);
5682     }
5683
5684   /* For the arm-wince targets we choose to be compatible with Microsoft's
5685      ARM and Thumb compilers, which always return aggregates in memory.  */
5686 #ifndef ARM_WINCE
5687   /* All structures/unions bigger than one word are returned in memory.
5688      Also catch the case where int_size_in_bytes returns -1.  In this case
5689      the aggregate is either huge or of variable size, and in either case
5690      we will want to return it via memory and not in a register.  */
5691   if (size < 0 || size > UNITS_PER_WORD)
5692     return true;
5693
5694   if (TREE_CODE (type) == RECORD_TYPE)
5695     {
5696       tree field;
5697
5698       /* For a struct the APCS says that we only return in a register
5699          if the type is 'integer like' and every addressable element
5700          has an offset of zero.  For practical purposes this means
5701          that the structure can have at most one non bit-field element
5702          and that this element must be the first one in the structure.  */
5703
5704       /* Find the first field, ignoring non FIELD_DECL things which will
5705          have been created by C++.  */
5706       for (field = TYPE_FIELDS (type);
5707            field && TREE_CODE (field) != FIELD_DECL;
5708            field = DECL_CHAIN (field))
5709         continue;
5710
5711       if (field == NULL)
5712         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5713
5714       /* Check that the first field is valid for returning in a register.  */
5715
5716       /* ... Floats are not allowed */
5717       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5718         return true;
5719
5720       /* ... Aggregates that are not themselves valid for returning in
5721          a register are not allowed.  */
5722       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5723         return true;
5724
5725       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5726          since they are not addressable.  */
5727       for (field = DECL_CHAIN (field);
5728            field;
5729            field = DECL_CHAIN (field))
5730         {
5731           if (TREE_CODE (field) != FIELD_DECL)
5732             continue;
5733
5734           if (!DECL_BIT_FIELD_TYPE (field))
5735             return true;
5736         }
5737
5738       return false;
5739     }
5740
5741   if (TREE_CODE (type) == UNION_TYPE)
5742     {
5743       tree field;
5744
5745       /* Unions can be returned in registers if every element is
5746          integral, or can be returned in an integer register.  */
5747       for (field = TYPE_FIELDS (type);
5748            field;
5749            field = DECL_CHAIN (field))
5750         {
5751           if (TREE_CODE (field) != FIELD_DECL)
5752             continue;
5753
5754           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5755             return true;
5756
5757           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5758             return true;
5759         }
5760
5761       return false;
5762     }
5763 #endif /* not ARM_WINCE */
5764
5765   /* Return all other types in memory.  */
5766   return true;
5767 }
5768
5769 const struct pcs_attribute_arg
5770 {
5771   const char *arg;
5772   enum arm_pcs value;
5773 } pcs_attribute_args[] =
5774   {
5775     {"aapcs", ARM_PCS_AAPCS},
5776     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5777 #if 0
5778     /* We could recognize these, but changes would be needed elsewhere
5779      * to implement them.  */
5780     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5781     {"atpcs", ARM_PCS_ATPCS},
5782     {"apcs", ARM_PCS_APCS},
5783 #endif
5784     {NULL, ARM_PCS_UNKNOWN}
5785   };
5786
5787 static enum arm_pcs
5788 arm_pcs_from_attribute (tree attr)
5789 {
5790   const struct pcs_attribute_arg *ptr;
5791   const char *arg;
5792
5793   /* Get the value of the argument.  */
5794   if (TREE_VALUE (attr) == NULL_TREE
5795       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5796     return ARM_PCS_UNKNOWN;
5797
5798   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5799
5800   /* Check it against the list of known arguments.  */
5801   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5802     if (streq (arg, ptr->arg))
5803       return ptr->value;
5804
5805   /* An unrecognized interrupt type.  */
5806   return ARM_PCS_UNKNOWN;
5807 }
5808
5809 /* Get the PCS variant to use for this call.  TYPE is the function's type
5810    specification, DECL is the specific declartion.  DECL may be null if
5811    the call could be indirect or if this is a library call.  */
5812 static enum arm_pcs
5813 arm_get_pcs_model (const_tree type, const_tree decl)
5814 {
5815   bool user_convention = false;
5816   enum arm_pcs user_pcs = arm_pcs_default;
5817   tree attr;
5818
5819   gcc_assert (type);
5820
5821   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5822   if (attr)
5823     {
5824       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5825       user_convention = true;
5826     }
5827
5828   if (TARGET_AAPCS_BASED)
5829     {
5830       /* Detect varargs functions.  These always use the base rules
5831          (no argument is ever a candidate for a co-processor
5832          register).  */
5833       bool base_rules = stdarg_p (type);
5834
5835       if (user_convention)
5836         {
5837           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5838             sorry ("non-AAPCS derived PCS variant");
5839           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5840             error ("variadic functions must use the base AAPCS variant");
5841         }
5842
5843       if (base_rules)
5844         return ARM_PCS_AAPCS;
5845       else if (user_convention)
5846         return user_pcs;
5847       else if (decl && flag_unit_at_a_time)
5848         {
5849           /* Local functions never leak outside this compilation unit,
5850              so we are free to use whatever conventions are
5851              appropriate.  */
5852           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5853           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5854           if (i && i->local)
5855             return ARM_PCS_AAPCS_LOCAL;
5856         }
5857     }
5858   else if (user_convention && user_pcs != arm_pcs_default)
5859     sorry ("PCS variant");
5860
5861   /* For everything else we use the target's default.  */
5862   return arm_pcs_default;
5863 }
5864
5865
5866 static void
5867 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5868                     const_tree fntype ATTRIBUTE_UNUSED,
5869                     rtx libcall ATTRIBUTE_UNUSED,
5870                     const_tree fndecl ATTRIBUTE_UNUSED)
5871 {
5872   /* Record the unallocated VFP registers.  */
5873   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5874   pcum->aapcs_vfp_reg_alloc = 0;
5875 }
5876
5877 /* Walk down the type tree of TYPE counting consecutive base elements.
5878    If *MODEP is VOIDmode, then set it to the first valid floating point
5879    type.  If a non-floating point type is found, or if a floating point
5880    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5881    otherwise return the count in the sub-tree.  */
5882 static int
5883 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5884 {
5885   machine_mode mode;
5886   HOST_WIDE_INT size;
5887
5888   switch (TREE_CODE (type))
5889     {
5890     case REAL_TYPE:
5891       mode = TYPE_MODE (type);
5892       if (mode != DFmode && mode != SFmode && mode != HFmode)
5893         return -1;
5894
5895       if (*modep == VOIDmode)
5896         *modep = mode;
5897
5898       if (*modep == mode)
5899         return 1;
5900
5901       break;
5902
5903     case COMPLEX_TYPE:
5904       mode = TYPE_MODE (TREE_TYPE (type));
5905       if (mode != DFmode && mode != SFmode)
5906         return -1;
5907
5908       if (*modep == VOIDmode)
5909         *modep = mode;
5910
5911       if (*modep == mode)
5912         return 2;
5913
5914       break;
5915
5916     case VECTOR_TYPE:
5917       /* Use V2SImode and V4SImode as representatives of all 64-bit
5918          and 128-bit vector types, whether or not those modes are
5919          supported with the present options.  */
5920       size = int_size_in_bytes (type);
5921       switch (size)
5922         {
5923         case 8:
5924           mode = V2SImode;
5925           break;
5926         case 16:
5927           mode = V4SImode;
5928           break;
5929         default:
5930           return -1;
5931         }
5932
5933       if (*modep == VOIDmode)
5934         *modep = mode;
5935
5936       /* Vector modes are considered to be opaque: two vectors are
5937          equivalent for the purposes of being homogeneous aggregates
5938          if they are the same size.  */
5939       if (*modep == mode)
5940         return 1;
5941
5942       break;
5943
5944     case ARRAY_TYPE:
5945       {
5946         int count;
5947         tree index = TYPE_DOMAIN (type);
5948
5949         /* Can't handle incomplete types nor sizes that are not
5950            fixed.  */
5951         if (!COMPLETE_TYPE_P (type)
5952             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5953           return -1;
5954
5955         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5956         if (count == -1
5957             || !index
5958             || !TYPE_MAX_VALUE (index)
5959             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5960             || !TYPE_MIN_VALUE (index)
5961             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5962             || count < 0)
5963           return -1;
5964
5965         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5966                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5967
5968         /* There must be no padding.  */
5969         if (wi::to_wide (TYPE_SIZE (type))
5970             != count * GET_MODE_BITSIZE (*modep))
5971           return -1;
5972
5973         return count;
5974       }
5975
5976     case RECORD_TYPE:
5977       {
5978         int count = 0;
5979         int sub_count;
5980         tree field;
5981
5982         /* Can't handle incomplete types nor sizes that are not
5983            fixed.  */
5984         if (!COMPLETE_TYPE_P (type)
5985             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5986           return -1;
5987
5988         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5989           {
5990             if (TREE_CODE (field) != FIELD_DECL)
5991               continue;
5992
5993             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5994             if (sub_count < 0)
5995               return -1;
5996             count += sub_count;
5997           }
5998
5999         /* There must be no padding.  */
6000         if (wi::to_wide (TYPE_SIZE (type))
6001             != count * GET_MODE_BITSIZE (*modep))
6002           return -1;
6003
6004         return count;
6005       }
6006
6007     case UNION_TYPE:
6008     case QUAL_UNION_TYPE:
6009       {
6010         /* These aren't very interesting except in a degenerate case.  */
6011         int count = 0;
6012         int sub_count;
6013         tree field;
6014
6015         /* Can't handle incomplete types nor sizes that are not
6016            fixed.  */
6017         if (!COMPLETE_TYPE_P (type)
6018             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6019           return -1;
6020
6021         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6022           {
6023             if (TREE_CODE (field) != FIELD_DECL)
6024               continue;
6025
6026             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6027             if (sub_count < 0)
6028               return -1;
6029             count = count > sub_count ? count : sub_count;
6030           }
6031
6032         /* There must be no padding.  */
6033         if (wi::to_wide (TYPE_SIZE (type))
6034             != count * GET_MODE_BITSIZE (*modep))
6035           return -1;
6036
6037         return count;
6038       }
6039
6040     default:
6041       break;
6042     }
6043
6044   return -1;
6045 }
6046
6047 /* Return true if PCS_VARIANT should use VFP registers.  */
6048 static bool
6049 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6050 {
6051   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6052     {
6053       static bool seen_thumb1_vfp = false;
6054
6055       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6056         {
6057           sorry ("Thumb-1 hard-float VFP ABI");
6058           /* sorry() is not immediately fatal, so only display this once.  */
6059           seen_thumb1_vfp = true;
6060         }
6061
6062       return true;
6063     }
6064
6065   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6066     return false;
6067
6068   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6069           (TARGET_VFP_DOUBLE || !is_double));
6070 }
6071
6072 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6073    suitable for passing or returning in VFP registers for the PCS
6074    variant selected.  If it is, then *BASE_MODE is updated to contain
6075    a machine mode describing each element of the argument's type and
6076    *COUNT to hold the number of such elements.  */
6077 static bool
6078 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6079                                        machine_mode mode, const_tree type,
6080                                        machine_mode *base_mode, int *count)
6081 {
6082   machine_mode new_mode = VOIDmode;
6083
6084   /* If we have the type information, prefer that to working things
6085      out from the mode.  */
6086   if (type)
6087     {
6088       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6089
6090       if (ag_count > 0 && ag_count <= 4)
6091         *count = ag_count;
6092       else
6093         return false;
6094     }
6095   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6096            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6097            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6098     {
6099       *count = 1;
6100       new_mode = mode;
6101     }
6102   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6103     {
6104       *count = 2;
6105       new_mode = (mode == DCmode ? DFmode : SFmode);
6106     }
6107   else
6108     return false;
6109
6110
6111   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6112     return false;
6113
6114   *base_mode = new_mode;
6115
6116   if (TARGET_GENERAL_REGS_ONLY)
6117     error ("argument of type %qT not permitted with -mgeneral-regs-only",
6118            type);
6119
6120   return true;
6121 }
6122
6123 static bool
6124 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6125                                machine_mode mode, const_tree type)
6126 {
6127   int count ATTRIBUTE_UNUSED;
6128   machine_mode ag_mode ATTRIBUTE_UNUSED;
6129
6130   if (!use_vfp_abi (pcs_variant, false))
6131     return false;
6132   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6133                                                 &ag_mode, &count);
6134 }
6135
6136 static bool
6137 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6138                              const_tree type)
6139 {
6140   if (!use_vfp_abi (pcum->pcs_variant, false))
6141     return false;
6142
6143   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6144                                                 &pcum->aapcs_vfp_rmode,
6145                                                 &pcum->aapcs_vfp_rcount);
6146 }
6147
6148 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6149    for the behaviour of this function.  */
6150
6151 static bool
6152 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6153                     const_tree type  ATTRIBUTE_UNUSED)
6154 {
6155   int rmode_size
6156     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6157   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6158   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6159   int regno;
6160
6161   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6162     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6163       {
6164         pcum->aapcs_vfp_reg_alloc = mask << regno;
6165         if (mode == BLKmode
6166             || (mode == TImode && ! TARGET_NEON)
6167             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6168           {
6169             int i;
6170             int rcount = pcum->aapcs_vfp_rcount;
6171             int rshift = shift;
6172             machine_mode rmode = pcum->aapcs_vfp_rmode;
6173             rtx par;
6174             if (!TARGET_NEON)
6175               {
6176                 /* Avoid using unsupported vector modes.  */
6177                 if (rmode == V2SImode)
6178                   rmode = DImode;
6179                 else if (rmode == V4SImode)
6180                   {
6181                     rmode = DImode;
6182                     rcount *= 2;
6183                     rshift /= 2;
6184                   }
6185               }
6186             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6187             for (i = 0; i < rcount; i++)
6188               {
6189                 rtx tmp = gen_rtx_REG (rmode,
6190                                        FIRST_VFP_REGNUM + regno + i * rshift);
6191                 tmp = gen_rtx_EXPR_LIST
6192                   (VOIDmode, tmp,
6193                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6194                 XVECEXP (par, 0, i) = tmp;
6195               }
6196
6197             pcum->aapcs_reg = par;
6198           }
6199         else
6200           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6201         return true;
6202       }
6203   return false;
6204 }
6205
6206 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6207    comment there for the behaviour of this function.  */
6208
6209 static rtx
6210 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6211                                machine_mode mode,
6212                                const_tree type ATTRIBUTE_UNUSED)
6213 {
6214   if (!use_vfp_abi (pcs_variant, false))
6215     return NULL;
6216
6217   if (mode == BLKmode
6218       || (GET_MODE_CLASS (mode) == MODE_INT
6219           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6220           && !TARGET_NEON))
6221     {
6222       int count;
6223       machine_mode ag_mode;
6224       int i;
6225       rtx par;
6226       int shift;
6227
6228       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6229                                              &ag_mode, &count);
6230
6231       if (!TARGET_NEON)
6232         {
6233           if (ag_mode == V2SImode)
6234             ag_mode = DImode;
6235           else if (ag_mode == V4SImode)
6236             {
6237               ag_mode = DImode;
6238               count *= 2;
6239             }
6240         }
6241       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6242       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6243       for (i = 0; i < count; i++)
6244         {
6245           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6246           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6247                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6248           XVECEXP (par, 0, i) = tmp;
6249         }
6250
6251       return par;
6252     }
6253
6254   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6255 }
6256
6257 static void
6258 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6259                    machine_mode mode  ATTRIBUTE_UNUSED,
6260                    const_tree type  ATTRIBUTE_UNUSED)
6261 {
6262   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6263   pcum->aapcs_vfp_reg_alloc = 0;
6264   return;
6265 }
6266
6267 #define AAPCS_CP(X)                             \
6268   {                                             \
6269     aapcs_ ## X ## _cum_init,                   \
6270     aapcs_ ## X ## _is_call_candidate,          \
6271     aapcs_ ## X ## _allocate,                   \
6272     aapcs_ ## X ## _is_return_candidate,        \
6273     aapcs_ ## X ## _allocate_return_reg,        \
6274     aapcs_ ## X ## _advance                     \
6275   }
6276
6277 /* Table of co-processors that can be used to pass arguments in
6278    registers.  Idealy no arugment should be a candidate for more than
6279    one co-processor table entry, but the table is processed in order
6280    and stops after the first match.  If that entry then fails to put
6281    the argument into a co-processor register, the argument will go on
6282    the stack.  */
6283 static struct
6284 {
6285   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6286   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6287
6288   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6289      BLKmode) is a candidate for this co-processor's registers; this
6290      function should ignore any position-dependent state in
6291      CUMULATIVE_ARGS and only use call-type dependent information.  */
6292   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6293
6294   /* Return true if the argument does get a co-processor register; it
6295      should set aapcs_reg to an RTX of the register allocated as is
6296      required for a return from FUNCTION_ARG.  */
6297   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6298
6299   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6300      be returned in this co-processor's registers.  */
6301   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6302
6303   /* Allocate and return an RTX element to hold the return type of a call.  This
6304      routine must not fail and will only be called if is_return_candidate
6305      returned true with the same parameters.  */
6306   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6307
6308   /* Finish processing this argument and prepare to start processing
6309      the next one.  */
6310   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6311 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6312   {
6313     AAPCS_CP(vfp)
6314   };
6315
6316 #undef AAPCS_CP
6317
6318 static int
6319 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6320                           const_tree type)
6321 {
6322   int i;
6323
6324   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6325     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6326       return i;
6327
6328   return -1;
6329 }
6330
6331 static int
6332 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6333 {
6334   /* We aren't passed a decl, so we can't check that a call is local.
6335      However, it isn't clear that that would be a win anyway, since it
6336      might limit some tail-calling opportunities.  */
6337   enum arm_pcs pcs_variant;
6338
6339   if (fntype)
6340     {
6341       const_tree fndecl = NULL_TREE;
6342
6343       if (TREE_CODE (fntype) == FUNCTION_DECL)
6344         {
6345           fndecl = fntype;
6346           fntype = TREE_TYPE (fntype);
6347         }
6348
6349       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6350     }
6351   else
6352     pcs_variant = arm_pcs_default;
6353
6354   if (pcs_variant != ARM_PCS_AAPCS)
6355     {
6356       int i;
6357
6358       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6359         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6360                                                         TYPE_MODE (type),
6361                                                         type))
6362           return i;
6363     }
6364   return -1;
6365 }
6366
6367 static rtx
6368 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6369                            const_tree fntype)
6370 {
6371   /* We aren't passed a decl, so we can't check that a call is local.
6372      However, it isn't clear that that would be a win anyway, since it
6373      might limit some tail-calling opportunities.  */
6374   enum arm_pcs pcs_variant;
6375   int unsignedp ATTRIBUTE_UNUSED;
6376
6377   if (fntype)
6378     {
6379       const_tree fndecl = NULL_TREE;
6380
6381       if (TREE_CODE (fntype) == FUNCTION_DECL)
6382         {
6383           fndecl = fntype;
6384           fntype = TREE_TYPE (fntype);
6385         }
6386
6387       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6388     }
6389   else
6390     pcs_variant = arm_pcs_default;
6391
6392   /* Promote integer types.  */
6393   if (type && INTEGRAL_TYPE_P (type))
6394     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6395
6396   if (pcs_variant != ARM_PCS_AAPCS)
6397     {
6398       int i;
6399
6400       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6401         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6402                                                         type))
6403           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6404                                                              mode, type);
6405     }
6406
6407   /* Promotes small structs returned in a register to full-word size
6408      for big-endian AAPCS.  */
6409   if (type && arm_return_in_msb (type))
6410     {
6411       HOST_WIDE_INT size = int_size_in_bytes (type);
6412       if (size % UNITS_PER_WORD != 0)
6413         {
6414           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6415           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6416         }
6417     }
6418
6419   return gen_rtx_REG (mode, R0_REGNUM);
6420 }
6421
6422 static rtx
6423 aapcs_libcall_value (machine_mode mode)
6424 {
6425   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6426       && GET_MODE_SIZE (mode) <= 4)
6427     mode = SImode;
6428
6429   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6430 }
6431
6432 /* Lay out a function argument using the AAPCS rules.  The rule
6433    numbers referred to here are those in the AAPCS.  */
6434 static void
6435 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6436                   const_tree type, bool named)
6437 {
6438   int nregs, nregs2;
6439   int ncrn;
6440
6441   /* We only need to do this once per argument.  */
6442   if (pcum->aapcs_arg_processed)
6443     return;
6444
6445   pcum->aapcs_arg_processed = true;
6446
6447   /* Special case: if named is false then we are handling an incoming
6448      anonymous argument which is on the stack.  */
6449   if (!named)
6450     return;
6451
6452   /* Is this a potential co-processor register candidate?  */
6453   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6454     {
6455       int slot = aapcs_select_call_coproc (pcum, mode, type);
6456       pcum->aapcs_cprc_slot = slot;
6457
6458       /* We don't have to apply any of the rules from part B of the
6459          preparation phase, these are handled elsewhere in the
6460          compiler.  */
6461
6462       if (slot >= 0)
6463         {
6464           /* A Co-processor register candidate goes either in its own
6465              class of registers or on the stack.  */
6466           if (!pcum->aapcs_cprc_failed[slot])
6467             {
6468               /* C1.cp - Try to allocate the argument to co-processor
6469                  registers.  */
6470               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6471                 return;
6472
6473               /* C2.cp - Put the argument on the stack and note that we
6474                  can't assign any more candidates in this slot.  We also
6475                  need to note that we have allocated stack space, so that
6476                  we won't later try to split a non-cprc candidate between
6477                  core registers and the stack.  */
6478               pcum->aapcs_cprc_failed[slot] = true;
6479               pcum->can_split = false;
6480             }
6481
6482           /* We didn't get a register, so this argument goes on the
6483              stack.  */
6484           gcc_assert (pcum->can_split == false);
6485           return;
6486         }
6487     }
6488
6489   /* C3 - For double-word aligned arguments, round the NCRN up to the
6490      next even number.  */
6491   ncrn = pcum->aapcs_ncrn;
6492   if (ncrn & 1)
6493     {
6494       int res = arm_needs_doubleword_align (mode, type);
6495       /* Only warn during RTL expansion of call stmts, otherwise we would
6496          warn e.g. during gimplification even on functions that will be
6497          always inlined, and we'd warn multiple times.  Don't warn when
6498          called in expand_function_start either, as we warn instead in
6499          arm_function_arg_boundary in that case.  */
6500       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6501         inform (input_location, "parameter passing for argument of type "
6502                 "%qT changed in GCC 7.1", type);
6503       else if (res > 0)
6504         ncrn++;
6505     }
6506
6507   nregs = ARM_NUM_REGS2(mode, type);
6508
6509   /* Sigh, this test should really assert that nregs > 0, but a GCC
6510      extension allows empty structs and then gives them empty size; it
6511      then allows such a structure to be passed by value.  For some of
6512      the code below we have to pretend that such an argument has
6513      non-zero size so that we 'locate' it correctly either in
6514      registers or on the stack.  */
6515   gcc_assert (nregs >= 0);
6516
6517   nregs2 = nregs ? nregs : 1;
6518
6519   /* C4 - Argument fits entirely in core registers.  */
6520   if (ncrn + nregs2 <= NUM_ARG_REGS)
6521     {
6522       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6523       pcum->aapcs_next_ncrn = ncrn + nregs;
6524       return;
6525     }
6526
6527   /* C5 - Some core registers left and there are no arguments already
6528      on the stack: split this argument between the remaining core
6529      registers and the stack.  */
6530   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6531     {
6532       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6533       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6534       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6535       return;
6536     }
6537
6538   /* C6 - NCRN is set to 4.  */
6539   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6540
6541   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6542   return;
6543 }
6544
6545 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6546    for a call to a function whose data type is FNTYPE.
6547    For a library call, FNTYPE is NULL.  */
6548 void
6549 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6550                           rtx libname,
6551                           tree fndecl ATTRIBUTE_UNUSED)
6552 {
6553   /* Long call handling.  */
6554   if (fntype)
6555     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6556   else
6557     pcum->pcs_variant = arm_pcs_default;
6558
6559   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6560     {
6561       if (arm_libcall_uses_aapcs_base (libname))
6562         pcum->pcs_variant = ARM_PCS_AAPCS;
6563
6564       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6565       pcum->aapcs_reg = NULL_RTX;
6566       pcum->aapcs_partial = 0;
6567       pcum->aapcs_arg_processed = false;
6568       pcum->aapcs_cprc_slot = -1;
6569       pcum->can_split = true;
6570
6571       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6572         {
6573           int i;
6574
6575           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6576             {
6577               pcum->aapcs_cprc_failed[i] = false;
6578               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6579             }
6580         }
6581       return;
6582     }
6583
6584   /* Legacy ABIs */
6585
6586   /* On the ARM, the offset starts at 0.  */
6587   pcum->nregs = 0;
6588   pcum->iwmmxt_nregs = 0;
6589   pcum->can_split = true;
6590
6591   /* Varargs vectors are treated the same as long long.
6592      named_count avoids having to change the way arm handles 'named' */
6593   pcum->named_count = 0;
6594   pcum->nargs = 0;
6595
6596   if (TARGET_REALLY_IWMMXT && fntype)
6597     {
6598       tree fn_arg;
6599
6600       for (fn_arg = TYPE_ARG_TYPES (fntype);
6601            fn_arg;
6602            fn_arg = TREE_CHAIN (fn_arg))
6603         pcum->named_count += 1;
6604
6605       if (! pcum->named_count)
6606         pcum->named_count = INT_MAX;
6607     }
6608 }
6609
6610 /* Return 2 if double word alignment is required for argument passing,
6611    but wasn't required before the fix for PR88469.
6612    Return 1 if double word alignment is required for argument passing.
6613    Return -1 if double word alignment used to be required for argument
6614    passing before PR77728 ABI fix, but is not required anymore.
6615    Return 0 if double word alignment is not required and wasn't requried
6616    before either.  */
6617 static int
6618 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6619 {
6620   if (!type)
6621     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6622
6623   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6624   if (!AGGREGATE_TYPE_P (type))
6625     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6626
6627   /* Array types: Use member alignment of element type.  */
6628   if (TREE_CODE (type) == ARRAY_TYPE)
6629     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6630
6631   int ret = 0;
6632   int ret2 = 0;
6633   /* Record/aggregate types: Use greatest member alignment of any member.  */
6634   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6635     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6636       {
6637         if (TREE_CODE (field) == FIELD_DECL)
6638           return 1;
6639         else
6640           /* Before PR77728 fix, we were incorrectly considering also
6641              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6642              Make sure we can warn about that with -Wpsabi.  */
6643           ret = -1;
6644       }
6645     else if (TREE_CODE (field) == FIELD_DECL
6646              && DECL_BIT_FIELD_TYPE (field)
6647              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
6648       ret2 = 1;
6649
6650   if (ret2)
6651     return 2;
6652
6653   return ret;
6654 }
6655
6656
6657 /* Determine where to put an argument to a function.
6658    Value is zero to push the argument on the stack,
6659    or a hard register in which to store the argument.
6660
6661    MODE is the argument's machine mode.
6662    TYPE is the data type of the argument (as a tree).
6663     This is null for libcalls where that information may
6664     not be available.
6665    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6666     the preceding args and about the function being called.
6667    NAMED is nonzero if this argument is a named parameter
6668     (otherwise it is an extra parameter matching an ellipsis).
6669
6670    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6671    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6672    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6673    defined), say it is passed in the stack (function_prologue will
6674    indeed make it pass in the stack if necessary).  */
6675
6676 static rtx
6677 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6678                   const_tree type, bool named)
6679 {
6680   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6681   int nregs;
6682
6683   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6684      a call insn (op3 of a call_value insn).  */
6685   if (mode == VOIDmode)
6686     return const0_rtx;
6687
6688   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6689     {
6690       aapcs_layout_arg (pcum, mode, type, named);
6691       return pcum->aapcs_reg;
6692     }
6693
6694   /* Varargs vectors are treated the same as long long.
6695      named_count avoids having to change the way arm handles 'named' */
6696   if (TARGET_IWMMXT_ABI
6697       && arm_vector_mode_supported_p (mode)
6698       && pcum->named_count > pcum->nargs + 1)
6699     {
6700       if (pcum->iwmmxt_nregs <= 9)
6701         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6702       else
6703         {
6704           pcum->can_split = false;
6705           return NULL_RTX;
6706         }
6707     }
6708
6709   /* Put doubleword aligned quantities in even register pairs.  */
6710   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6711     {
6712       int res = arm_needs_doubleword_align (mode, type);
6713       if (res < 0 && warn_psabi)
6714         inform (input_location, "parameter passing for argument of type "
6715                 "%qT changed in GCC 7.1", type);
6716       else if (res > 0)
6717         {
6718           pcum->nregs++;
6719           if (res > 1 && warn_psabi)
6720             inform (input_location, "parameter passing for argument of type "
6721                     "%qT changed in GCC 9.1", type);
6722         }
6723     }
6724
6725   /* Only allow splitting an arg between regs and memory if all preceding
6726      args were allocated to regs.  For args passed by reference we only count
6727      the reference pointer.  */
6728   if (pcum->can_split)
6729     nregs = 1;
6730   else
6731     nregs = ARM_NUM_REGS2 (mode, type);
6732
6733   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6734     return NULL_RTX;
6735
6736   return gen_rtx_REG (mode, pcum->nregs);
6737 }
6738
6739 static unsigned int
6740 arm_function_arg_boundary (machine_mode mode, const_tree type)
6741 {
6742   if (!ARM_DOUBLEWORD_ALIGN)
6743     return PARM_BOUNDARY;
6744
6745   int res = arm_needs_doubleword_align (mode, type);
6746   if (res < 0 && warn_psabi)
6747     inform (input_location, "parameter passing for argument of type %qT "
6748             "changed in GCC 7.1", type);
6749   if (res > 1 && warn_psabi)
6750     inform (input_location, "parameter passing for argument of type "
6751             "%qT changed in GCC 9.1", type);
6752
6753   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6754 }
6755
6756 static int
6757 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
6758 {
6759   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6760   int nregs = pcum->nregs;
6761
6762   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6763     {
6764       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6765       return pcum->aapcs_partial;
6766     }
6767
6768   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
6769     return 0;
6770
6771   if (NUM_ARG_REGS > nregs
6772       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
6773       && pcum->can_split)
6774     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6775
6776   return 0;
6777 }
6778
6779 /* Update the data in PCUM to advance over an argument
6780    of mode MODE and data type TYPE.
6781    (TYPE is null for libcalls where that information may not be available.)  */
6782
6783 static void
6784 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6785                           const_tree type, bool named)
6786 {
6787   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6788
6789   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6790     {
6791       aapcs_layout_arg (pcum, mode, type, named);
6792
6793       if (pcum->aapcs_cprc_slot >= 0)
6794         {
6795           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6796                                                               type);
6797           pcum->aapcs_cprc_slot = -1;
6798         }
6799
6800       /* Generic stuff.  */
6801       pcum->aapcs_arg_processed = false;
6802       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6803       pcum->aapcs_reg = NULL_RTX;
6804       pcum->aapcs_partial = 0;
6805     }
6806   else
6807     {
6808       pcum->nargs += 1;
6809       if (arm_vector_mode_supported_p (mode)
6810           && pcum->named_count > pcum->nargs
6811           && TARGET_IWMMXT_ABI)
6812         pcum->iwmmxt_nregs += 1;
6813       else
6814         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6815     }
6816 }
6817
6818 /* Variable sized types are passed by reference.  This is a GCC
6819    extension to the ARM ABI.  */
6820
6821 static bool
6822 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6823 {
6824   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
6825 }
6826 \f
6827 /* Encode the current state of the #pragma [no_]long_calls.  */
6828 typedef enum
6829 {
6830   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6831   LONG,         /* #pragma long_calls is in effect.  */
6832   SHORT         /* #pragma no_long_calls is in effect.  */
6833 } arm_pragma_enum;
6834
6835 static arm_pragma_enum arm_pragma_long_calls = OFF;
6836
6837 void
6838 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6839 {
6840   arm_pragma_long_calls = LONG;
6841 }
6842
6843 void
6844 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6845 {
6846   arm_pragma_long_calls = SHORT;
6847 }
6848
6849 void
6850 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6851 {
6852   arm_pragma_long_calls = OFF;
6853 }
6854 \f
6855 /* Handle an attribute requiring a FUNCTION_DECL;
6856    arguments as in struct attribute_spec.handler.  */
6857 static tree
6858 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6859                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6860 {
6861   if (TREE_CODE (*node) != FUNCTION_DECL)
6862     {
6863       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6864                name);
6865       *no_add_attrs = true;
6866     }
6867
6868   return NULL_TREE;
6869 }
6870
6871 /* Handle an "interrupt" or "isr" attribute;
6872    arguments as in struct attribute_spec.handler.  */
6873 static tree
6874 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6875                           bool *no_add_attrs)
6876 {
6877   if (DECL_P (*node))
6878     {
6879       if (TREE_CODE (*node) != FUNCTION_DECL)
6880         {
6881           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6882                    name);
6883           *no_add_attrs = true;
6884         }
6885       /* FIXME: the argument if any is checked for type attributes;
6886          should it be checked for decl ones?  */
6887     }
6888   else
6889     {
6890       if (TREE_CODE (*node) == FUNCTION_TYPE
6891           || TREE_CODE (*node) == METHOD_TYPE)
6892         {
6893           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6894             {
6895               warning (OPT_Wattributes, "%qE attribute ignored",
6896                        name);
6897               *no_add_attrs = true;
6898             }
6899         }
6900       else if (TREE_CODE (*node) == POINTER_TYPE
6901                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6902                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6903                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6904         {
6905           *node = build_variant_type_copy (*node);
6906           TREE_TYPE (*node) = build_type_attribute_variant
6907             (TREE_TYPE (*node),
6908              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6909           *no_add_attrs = true;
6910         }
6911       else
6912         {
6913           /* Possibly pass this attribute on from the type to a decl.  */
6914           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6915                        | (int) ATTR_FLAG_FUNCTION_NEXT
6916                        | (int) ATTR_FLAG_ARRAY_NEXT))
6917             {
6918               *no_add_attrs = true;
6919               return tree_cons (name, args, NULL_TREE);
6920             }
6921           else
6922             {
6923               warning (OPT_Wattributes, "%qE attribute ignored",
6924                        name);
6925             }
6926         }
6927     }
6928
6929   return NULL_TREE;
6930 }
6931
6932 /* Handle a "pcs" attribute; arguments as in struct
6933    attribute_spec.handler.  */
6934 static tree
6935 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6936                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6937 {
6938   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6939     {
6940       warning (OPT_Wattributes, "%qE attribute ignored", name);
6941       *no_add_attrs = true;
6942     }
6943   return NULL_TREE;
6944 }
6945
6946 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6947 /* Handle the "notshared" attribute.  This attribute is another way of
6948    requesting hidden visibility.  ARM's compiler supports
6949    "__declspec(notshared)"; we support the same thing via an
6950    attribute.  */
6951
6952 static tree
6953 arm_handle_notshared_attribute (tree *node,
6954                                 tree name ATTRIBUTE_UNUSED,
6955                                 tree args ATTRIBUTE_UNUSED,
6956                                 int flags ATTRIBUTE_UNUSED,
6957                                 bool *no_add_attrs)
6958 {
6959   tree decl = TYPE_NAME (*node);
6960
6961   if (decl)
6962     {
6963       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6964       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6965       *no_add_attrs = false;
6966     }
6967   return NULL_TREE;
6968 }
6969 #endif
6970
6971 /* This function returns true if a function with declaration FNDECL and type
6972    FNTYPE uses the stack to pass arguments or return variables and false
6973    otherwise.  This is used for functions with the attributes
6974    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6975    diagnostic messages if the stack is used.  NAME is the name of the attribute
6976    used.  */
6977
6978 static bool
6979 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6980 {
6981   function_args_iterator args_iter;
6982   CUMULATIVE_ARGS args_so_far_v;
6983   cumulative_args_t args_so_far;
6984   bool first_param = true;
6985   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6986
6987   /* Error out if any argument is passed on the stack.  */
6988   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6989   args_so_far = pack_cumulative_args (&args_so_far_v);
6990   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6991     {
6992       rtx arg_rtx;
6993       machine_mode arg_mode = TYPE_MODE (arg_type);
6994
6995       prev_arg_type = arg_type;
6996       if (VOID_TYPE_P (arg_type))
6997         continue;
6998
6999       function_arg_info arg (arg_type, /*named=*/true);
7000       if (!first_param)
7001         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
7002       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
7003       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7004         {
7005           error ("%qE attribute not available to functions with arguments "
7006                  "passed on the stack", name);
7007           return true;
7008         }
7009       first_param = false;
7010     }
7011
7012   /* Error out for variadic functions since we cannot control how many
7013      arguments will be passed and thus stack could be used.  stdarg_p () is not
7014      used for the checking to avoid browsing arguments twice.  */
7015   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7016     {
7017       error ("%qE attribute not available to functions with variable number "
7018              "of arguments", name);
7019       return true;
7020     }
7021
7022   /* Error out if return value is passed on the stack.  */
7023   ret_type = TREE_TYPE (fntype);
7024   if (arm_return_in_memory (ret_type, fntype))
7025     {
7026       error ("%qE attribute not available to functions that return value on "
7027              "the stack", name);
7028       return true;
7029     }
7030   return false;
7031 }
7032
7033 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7034    function will check whether the attribute is allowed here and will add the
7035    attribute to the function declaration tree or otherwise issue a warning.  */
7036
7037 static tree
7038 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7039                                  tree /* args */,
7040                                  int /* flags */,
7041                                  bool *no_add_attrs)
7042 {
7043   tree fndecl;
7044
7045   if (!use_cmse)
7046     {
7047       *no_add_attrs = true;
7048       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7049                "option.", name);
7050       return NULL_TREE;
7051     }
7052
7053   /* Ignore attribute for function types.  */
7054   if (TREE_CODE (*node) != FUNCTION_DECL)
7055     {
7056       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7057                name);
7058       *no_add_attrs = true;
7059       return NULL_TREE;
7060     }
7061
7062   fndecl = *node;
7063
7064   /* Warn for static linkage functions.  */
7065   if (!TREE_PUBLIC (fndecl))
7066     {
7067       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7068                "with static linkage", name);
7069       *no_add_attrs = true;
7070       return NULL_TREE;
7071     }
7072
7073   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7074                                                 TREE_TYPE (fndecl));
7075   return NULL_TREE;
7076 }
7077
7078
7079 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7080    function will check whether the attribute is allowed here and will add the
7081    attribute to the function type tree or otherwise issue a diagnostic.  The
7082    reason we check this at declaration time is to only allow the use of the
7083    attribute with declarations of function pointers and not function
7084    declarations.  This function checks NODE is of the expected type and issues
7085    diagnostics otherwise using NAME.  If it is not of the expected type
7086    *NO_ADD_ATTRS will be set to true.  */
7087
7088 static tree
7089 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7090                                  tree /* args */,
7091                                  int /* flags */,
7092                                  bool *no_add_attrs)
7093 {
7094   tree decl = NULL_TREE, fntype = NULL_TREE;
7095   tree type;
7096
7097   if (!use_cmse)
7098     {
7099       *no_add_attrs = true;
7100       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7101                "option.", name);
7102       return NULL_TREE;
7103     }
7104
7105   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7106     {
7107       decl = *node;
7108       fntype = TREE_TYPE (decl);
7109     }
7110
7111   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7112     fntype = TREE_TYPE (fntype);
7113
7114   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7115     {
7116         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7117                  "function pointer", name);
7118         *no_add_attrs = true;
7119         return NULL_TREE;
7120     }
7121
7122   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7123
7124   if (*no_add_attrs)
7125     return NULL_TREE;
7126
7127   /* Prevent trees being shared among function types with and without
7128      cmse_nonsecure_call attribute.  */
7129   type = TREE_TYPE (decl);
7130
7131   type = build_distinct_type_copy (type);
7132   TREE_TYPE (decl) = type;
7133   fntype = type;
7134
7135   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7136     {
7137       type = fntype;
7138       fntype = TREE_TYPE (fntype);
7139       fntype = build_distinct_type_copy (fntype);
7140       TREE_TYPE (type) = fntype;
7141     }
7142
7143   /* Construct a type attribute and add it to the function type.  */
7144   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7145                           TYPE_ATTRIBUTES (fntype));
7146   TYPE_ATTRIBUTES (fntype) = attrs;
7147   return NULL_TREE;
7148 }
7149
7150 /* Return 0 if the attributes for two types are incompatible, 1 if they
7151    are compatible, and 2 if they are nearly compatible (which causes a
7152    warning to be generated).  */
7153 static int
7154 arm_comp_type_attributes (const_tree type1, const_tree type2)
7155 {
7156   int l1, l2, s1, s2;
7157
7158   /* Check for mismatch of non-default calling convention.  */
7159   if (TREE_CODE (type1) != FUNCTION_TYPE)
7160     return 1;
7161
7162   /* Check for mismatched call attributes.  */
7163   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7164   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7165   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7166   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7167
7168   /* Only bother to check if an attribute is defined.  */
7169   if (l1 | l2 | s1 | s2)
7170     {
7171       /* If one type has an attribute, the other must have the same attribute.  */
7172       if ((l1 != l2) || (s1 != s2))
7173         return 0;
7174
7175       /* Disallow mixed attributes.  */
7176       if ((l1 & s2) || (l2 & s1))
7177         return 0;
7178     }
7179
7180   /* Check for mismatched ISR attribute.  */
7181   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7182   if (! l1)
7183     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7184   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7185   if (! l2)
7186     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7187   if (l1 != l2)
7188     return 0;
7189
7190   l1 = lookup_attribute ("cmse_nonsecure_call",
7191                          TYPE_ATTRIBUTES (type1)) != NULL;
7192   l2 = lookup_attribute ("cmse_nonsecure_call",
7193                          TYPE_ATTRIBUTES (type2)) != NULL;
7194
7195   if (l1 != l2)
7196     return 0;
7197
7198   return 1;
7199 }
7200
7201 /*  Assigns default attributes to newly defined type.  This is used to
7202     set short_call/long_call attributes for function types of
7203     functions defined inside corresponding #pragma scopes.  */
7204 static void
7205 arm_set_default_type_attributes (tree type)
7206 {
7207   /* Add __attribute__ ((long_call)) to all functions, when
7208      inside #pragma long_calls or __attribute__ ((short_call)),
7209      when inside #pragma no_long_calls.  */
7210   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7211     {
7212       tree type_attr_list, attr_name;
7213       type_attr_list = TYPE_ATTRIBUTES (type);
7214
7215       if (arm_pragma_long_calls == LONG)
7216         attr_name = get_identifier ("long_call");
7217       else if (arm_pragma_long_calls == SHORT)
7218         attr_name = get_identifier ("short_call");
7219       else
7220         return;
7221
7222       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7223       TYPE_ATTRIBUTES (type) = type_attr_list;
7224     }
7225 }
7226 \f
7227 /* Return true if DECL is known to be linked into section SECTION.  */
7228
7229 static bool
7230 arm_function_in_section_p (tree decl, section *section)
7231 {
7232   /* We can only be certain about the prevailing symbol definition.  */
7233   if (!decl_binds_to_current_def_p (decl))
7234     return false;
7235
7236   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7237   if (!DECL_SECTION_NAME (decl))
7238     {
7239       /* Make sure that we will not create a unique section for DECL.  */
7240       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7241         return false;
7242     }
7243
7244   return function_section (decl) == section;
7245 }
7246
7247 /* Return nonzero if a 32-bit "long_call" should be generated for
7248    a call from the current function to DECL.  We generate a long_call
7249    if the function:
7250
7251         a.  has an __attribute__((long call))
7252      or b.  is within the scope of a #pragma long_calls
7253      or c.  the -mlong-calls command line switch has been specified
7254
7255    However we do not generate a long call if the function:
7256
7257         d.  has an __attribute__ ((short_call))
7258      or e.  is inside the scope of a #pragma no_long_calls
7259      or f.  is defined in the same section as the current function.  */
7260
7261 bool
7262 arm_is_long_call_p (tree decl)
7263 {
7264   tree attrs;
7265
7266   if (!decl)
7267     return TARGET_LONG_CALLS;
7268
7269   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7270   if (lookup_attribute ("short_call", attrs))
7271     return false;
7272
7273   /* For "f", be conservative, and only cater for cases in which the
7274      whole of the current function is placed in the same section.  */
7275   if (!flag_reorder_blocks_and_partition
7276       && TREE_CODE (decl) == FUNCTION_DECL
7277       && arm_function_in_section_p (decl, current_function_section ()))
7278     return false;
7279
7280   if (lookup_attribute ("long_call", attrs))
7281     return true;
7282
7283   return TARGET_LONG_CALLS;
7284 }
7285
7286 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7287 static bool
7288 arm_function_ok_for_sibcall (tree decl, tree exp)
7289 {
7290   unsigned long func_type;
7291
7292   if (cfun->machine->sibcall_blocked)
7293     return false;
7294
7295   /* Never tailcall something if we are generating code for Thumb-1.  */
7296   if (TARGET_THUMB1)
7297     return false;
7298
7299   /* The PIC register is live on entry to VxWorks PLT entries, so we
7300      must make the call before restoring the PIC register.  */
7301   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7302     return false;
7303
7304   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7305      may be used both as target of the call and base register for restoring
7306      the VFP registers  */
7307   if (TARGET_APCS_FRAME && TARGET_ARM
7308       && TARGET_HARD_FLOAT
7309       && decl && arm_is_long_call_p (decl))
7310     return false;
7311
7312   /* If we are interworking and the function is not declared static
7313      then we can't tail-call it unless we know that it exists in this
7314      compilation unit (since it might be a Thumb routine).  */
7315   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7316       && !TREE_ASM_WRITTEN (decl))
7317     return false;
7318
7319   func_type = arm_current_func_type ();
7320   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7321   if (IS_INTERRUPT (func_type))
7322     return false;
7323
7324   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7325      generated for entry functions themselves.  */
7326   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7327     return false;
7328
7329   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7330      this would complicate matters for later code generation.  */
7331   if (TREE_CODE (exp) == CALL_EXPR)
7332     {
7333       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7334       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7335         return false;
7336     }
7337
7338   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7339     {
7340       /* Check that the return value locations are the same.  For
7341          example that we aren't returning a value from the sibling in
7342          a VFP register but then need to transfer it to a core
7343          register.  */
7344       rtx a, b;
7345       tree decl_or_type = decl;
7346
7347       /* If it is an indirect function pointer, get the function type.  */
7348       if (!decl)
7349         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7350
7351       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7352       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7353                               cfun->decl, false);
7354       if (!rtx_equal_p (a, b))
7355         return false;
7356     }
7357
7358   /* Never tailcall if function may be called with a misaligned SP.  */
7359   if (IS_STACKALIGN (func_type))
7360     return false;
7361
7362   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7363      references should become a NOP.  Don't convert such calls into
7364      sibling calls.  */
7365   if (TARGET_AAPCS_BASED
7366       && arm_abi == ARM_ABI_AAPCS
7367       && decl
7368       && DECL_WEAK (decl))
7369     return false;
7370
7371   /* We cannot do a tailcall for an indirect call by descriptor if all the
7372      argument registers are used because the only register left to load the
7373      address is IP and it will already contain the static chain.  */
7374   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7375     {
7376       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7377       CUMULATIVE_ARGS cum;
7378       cumulative_args_t cum_v;
7379
7380       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7381       cum_v = pack_cumulative_args (&cum);
7382
7383       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7384         {
7385           tree type = TREE_VALUE (t);
7386           if (!VOID_TYPE_P (type))
7387             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7388         }
7389
7390       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7391         return false;
7392     }
7393
7394   /* Everything else is ok.  */
7395   return true;
7396 }
7397
7398 \f
7399 /* Addressing mode support functions.  */
7400
7401 /* Return nonzero if X is a legitimate immediate operand when compiling
7402    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7403 int
7404 legitimate_pic_operand_p (rtx x)
7405 {
7406   if (GET_CODE (x) == SYMBOL_REF
7407       || (GET_CODE (x) == CONST
7408           && GET_CODE (XEXP (x, 0)) == PLUS
7409           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7410     return 0;
7411
7412   return 1;
7413 }
7414
7415 /* Record that the current function needs a PIC register.  If PIC_REG is null,
7416    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
7417    both case cfun->machine->pic_reg is initialized if we have not already done
7418    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
7419    PIC register is reloaded in the current position of the instruction stream
7420    irregardless of whether it was loaded before.  Otherwise, it is only loaded
7421    if not already done so (crtl->uses_pic_offset_table is null).  Note that
7422    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7423    is only supported iff COMPUTE_NOW is false.  */
7424
7425 static void
7426 require_pic_register (rtx pic_reg, bool compute_now)
7427 {
7428   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7429
7430   /* A lot of the logic here is made obscure by the fact that this
7431      routine gets called as part of the rtx cost estimation process.
7432      We don't want those calls to affect any assumptions about the real
7433      function; and further, we can't call entry_of_function() until we
7434      start the real expansion process.  */
7435   if (!crtl->uses_pic_offset_table || compute_now)
7436     {
7437       gcc_assert (can_create_pseudo_p ()
7438                   || (pic_reg != NULL_RTX
7439                       && REG_P (pic_reg)
7440                       && GET_MODE (pic_reg) == Pmode));
7441       if (arm_pic_register != INVALID_REGNUM
7442           && !compute_now
7443           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7444         {
7445           if (!cfun->machine->pic_reg)
7446             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7447
7448           /* Play games to avoid marking the function as needing pic
7449              if we are being called as part of the cost-estimation
7450              process.  */
7451           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7452             crtl->uses_pic_offset_table = 1;
7453         }
7454       else
7455         {
7456           rtx_insn *seq, *insn;
7457
7458           if (pic_reg == NULL_RTX)
7459             pic_reg = gen_reg_rtx (Pmode);
7460           if (!cfun->machine->pic_reg)
7461             cfun->machine->pic_reg = pic_reg;
7462
7463           /* Play games to avoid marking the function as needing pic
7464              if we are being called as part of the cost-estimation
7465              process.  */
7466           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7467             {
7468               crtl->uses_pic_offset_table = 1;
7469               start_sequence ();
7470
7471               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7472                   && arm_pic_register > LAST_LO_REGNUM
7473                   && !compute_now)
7474                 emit_move_insn (cfun->machine->pic_reg,
7475                                 gen_rtx_REG (Pmode, arm_pic_register));
7476               else
7477                 arm_load_pic_register (0UL, pic_reg);
7478
7479               seq = get_insns ();
7480               end_sequence ();
7481
7482               for (insn = seq; insn; insn = NEXT_INSN (insn))
7483                 if (INSN_P (insn))
7484                   INSN_LOCATION (insn) = prologue_location;
7485
7486               /* We can be called during expansion of PHI nodes, where
7487                  we can't yet emit instructions directly in the final
7488                  insn stream.  Queue the insns on the entry edge, they will
7489                  be committed after everything else is expanded.  */
7490               if (currently_expanding_to_rtl)
7491                 insert_insn_on_edge (seq,
7492                                      single_succ_edge
7493                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7494               else
7495                 emit_insn (seq);
7496             }
7497         }
7498     }
7499 }
7500
7501 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
7502    created to hold the result of the load.  If not NULL, PIC_REG indicates
7503    which register to use as PIC register, otherwise it is decided by register
7504    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
7505    location in the instruction stream, irregardless of whether it was loaded
7506    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7507    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7508
7509    Returns the register REG into which the PIC load is performed.  */
7510
7511 rtx
7512 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7513                         bool compute_now)
7514 {
7515   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7516
7517   if (GET_CODE (orig) == SYMBOL_REF
7518       || GET_CODE (orig) == LABEL_REF)
7519     {
7520       if (reg == 0)
7521         {
7522           gcc_assert (can_create_pseudo_p ());
7523           reg = gen_reg_rtx (Pmode);
7524         }
7525
7526       /* VxWorks does not impose a fixed gap between segments; the run-time
7527          gap can be different from the object-file gap.  We therefore can't
7528          use GOTOFF unless we are absolutely sure that the symbol is in the
7529          same segment as the GOT.  Unfortunately, the flexibility of linker
7530          scripts means that we can't be sure of that in general, so assume
7531          that GOTOFF is never valid on VxWorks.  */
7532       /* References to weak symbols cannot be resolved locally: they
7533          may be overridden by a non-weak definition at link time.  */
7534       rtx_insn *insn;
7535       if ((GET_CODE (orig) == LABEL_REF
7536            || (GET_CODE (orig) == SYMBOL_REF
7537                && SYMBOL_REF_LOCAL_P (orig)
7538                && (SYMBOL_REF_DECL (orig)
7539                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7540           && NEED_GOT_RELOC
7541           && arm_pic_data_is_text_relative)
7542         insn = arm_pic_static_addr (orig, reg);
7543       else
7544         {
7545           rtx pat;
7546           rtx mem;
7547
7548           /* If this function doesn't have a pic register, create one now.  */
7549           require_pic_register (pic_reg, compute_now);
7550
7551           if (pic_reg == NULL_RTX)
7552             pic_reg = cfun->machine->pic_reg;
7553
7554           pat = gen_calculate_pic_address (reg, pic_reg, orig);
7555
7556           /* Make the MEM as close to a constant as possible.  */
7557           mem = SET_SRC (pat);
7558           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7559           MEM_READONLY_P (mem) = 1;
7560           MEM_NOTRAP_P (mem) = 1;
7561
7562           insn = emit_insn (pat);
7563         }
7564
7565       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7566          by loop.  */
7567       set_unique_reg_note (insn, REG_EQUAL, orig);
7568
7569       return reg;
7570     }
7571   else if (GET_CODE (orig) == CONST)
7572     {
7573       rtx base, offset;
7574
7575       if (GET_CODE (XEXP (orig, 0)) == PLUS
7576           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7577         return orig;
7578
7579       /* Handle the case where we have: const (UNSPEC_TLS).  */
7580       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7581           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7582         return orig;
7583
7584       /* Handle the case where we have:
7585          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7586          CONST_INT.  */
7587       if (GET_CODE (XEXP (orig, 0)) == PLUS
7588           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7589           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7590         {
7591           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7592           return orig;
7593         }
7594
7595       if (reg == 0)
7596         {
7597           gcc_assert (can_create_pseudo_p ());
7598           reg = gen_reg_rtx (Pmode);
7599         }
7600
7601       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7602
7603       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7604                                      pic_reg, compute_now);
7605       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7606                                        base == reg ? 0 : reg, pic_reg,
7607                                        compute_now);
7608
7609       if (CONST_INT_P (offset))
7610         {
7611           /* The base register doesn't really matter, we only want to
7612              test the index for the appropriate mode.  */
7613           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7614             {
7615               gcc_assert (can_create_pseudo_p ());
7616               offset = force_reg (Pmode, offset);
7617             }
7618
7619           if (CONST_INT_P (offset))
7620             return plus_constant (Pmode, base, INTVAL (offset));
7621         }
7622
7623       if (GET_MODE_SIZE (mode) > 4
7624           && (GET_MODE_CLASS (mode) == MODE_INT
7625               || TARGET_SOFT_FLOAT))
7626         {
7627           emit_insn (gen_addsi3 (reg, base, offset));
7628           return reg;
7629         }
7630
7631       return gen_rtx_PLUS (Pmode, base, offset);
7632     }
7633
7634   return orig;
7635 }
7636
7637
7638 /* Whether a register is callee saved or not.  This is necessary because high
7639    registers are marked as caller saved when optimizing for size on Thumb-1
7640    targets despite being callee saved in order to avoid using them.  */
7641 #define callee_saved_reg_p(reg) \
7642   (!call_used_regs[reg] \
7643    || (TARGET_THUMB1 && optimize_size \
7644        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7645
7646 /* Return a mask for the call-clobbered low registers that are unused
7647    at the end of the prologue.  */
7648 static unsigned long
7649 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7650 {
7651   unsigned long mask = 0;
7652   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7653
7654   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7655     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
7656       mask |= 1 << (reg - FIRST_LO_REGNUM);
7657   return mask;
7658 }
7659
7660 /* Similarly for the start of the epilogue.  */
7661 static unsigned long
7662 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7663 {
7664   unsigned long mask = 0;
7665   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
7666
7667   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7668     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
7669       mask |= 1 << (reg - FIRST_LO_REGNUM);
7670   return mask;
7671 }
7672
7673 /* Find a spare register to use during the prolog of a function.  */
7674
7675 static int
7676 thumb_find_work_register (unsigned long pushed_regs_mask)
7677 {
7678   int reg;
7679
7680   unsigned long unused_regs
7681     = thumb1_prologue_unused_call_clobbered_lo_regs ();
7682
7683   /* Check the argument registers first as these are call-used.  The
7684      register allocation order means that sometimes r3 might be used
7685      but earlier argument registers might not, so check them all.  */
7686   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
7687     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
7688       return reg;
7689
7690   /* Otherwise look for a call-saved register that is going to be pushed.  */
7691   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7692     if (pushed_regs_mask & (1 << reg))
7693       return reg;
7694
7695   if (TARGET_THUMB2)
7696     {
7697       /* Thumb-2 can use high regs.  */
7698       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7699         if (pushed_regs_mask & (1 << reg))
7700           return reg;
7701     }
7702   /* Something went wrong - thumb_compute_save_reg_mask()
7703      should have arranged for a suitable register to be pushed.  */
7704   gcc_unreachable ();
7705 }
7706
7707 static GTY(()) int pic_labelno;
7708
7709 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7710    low register.  */
7711
7712 void
7713 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7714 {
7715   rtx l1, labelno, pic_tmp, pic_rtx;
7716
7717   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7718     return;
7719
7720   gcc_assert (flag_pic);
7721
7722   if (pic_reg == NULL_RTX)
7723     pic_reg = cfun->machine->pic_reg;
7724   if (TARGET_VXWORKS_RTP)
7725     {
7726       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7727       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7728       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7729
7730       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7731
7732       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7733       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7734     }
7735   else
7736     {
7737       /* We use an UNSPEC rather than a LABEL_REF because this label
7738          never appears in the code stream.  */
7739
7740       labelno = GEN_INT (pic_labelno++);
7741       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7742       l1 = gen_rtx_CONST (VOIDmode, l1);
7743
7744       /* On the ARM the PC register contains 'dot + 8' at the time of the
7745          addition, on the Thumb it is 'dot + 4'.  */
7746       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7747       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7748                                 UNSPEC_GOTSYM_OFF);
7749       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7750
7751       if (TARGET_32BIT)
7752         {
7753           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7754         }
7755       else /* TARGET_THUMB1 */
7756         {
7757           if (arm_pic_register != INVALID_REGNUM
7758               && REGNO (pic_reg) > LAST_LO_REGNUM)
7759             {
7760               /* We will have pushed the pic register, so we should always be
7761                  able to find a work register.  */
7762               pic_tmp = gen_rtx_REG (SImode,
7763                                      thumb_find_work_register (saved_regs));
7764               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7765               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7766               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7767             }
7768           else if (arm_pic_register != INVALID_REGNUM
7769                    && arm_pic_register > LAST_LO_REGNUM
7770                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7771             {
7772               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7773               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7774               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7775             }
7776           else
7777             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7778         }
7779     }
7780
7781   /* Need to emit this whether or not we obey regdecls,
7782      since setjmp/longjmp can cause life info to screw up.  */
7783   emit_use (pic_reg);
7784 }
7785
7786 /* Generate code to load the address of a static var when flag_pic is set.  */
7787 static rtx_insn *
7788 arm_pic_static_addr (rtx orig, rtx reg)
7789 {
7790   rtx l1, labelno, offset_rtx;
7791
7792   gcc_assert (flag_pic);
7793
7794   /* We use an UNSPEC rather than a LABEL_REF because this label
7795      never appears in the code stream.  */
7796   labelno = GEN_INT (pic_labelno++);
7797   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7798   l1 = gen_rtx_CONST (VOIDmode, l1);
7799
7800   /* On the ARM the PC register contains 'dot + 8' at the time of the
7801      addition, on the Thumb it is 'dot + 4'.  */
7802   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7803   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7804                                UNSPEC_SYMBOL_OFFSET);
7805   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7806
7807   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7808 }
7809
7810 /* Return nonzero if X is valid as an ARM state addressing register.  */
7811 static int
7812 arm_address_register_rtx_p (rtx x, int strict_p)
7813 {
7814   int regno;
7815
7816   if (!REG_P (x))
7817     return 0;
7818
7819   regno = REGNO (x);
7820
7821   if (strict_p)
7822     return ARM_REGNO_OK_FOR_BASE_P (regno);
7823
7824   return (regno <= LAST_ARM_REGNUM
7825           || regno >= FIRST_PSEUDO_REGISTER
7826           || regno == FRAME_POINTER_REGNUM
7827           || regno == ARG_POINTER_REGNUM);
7828 }
7829
7830 /* Return TRUE if this rtx is the difference of a symbol and a label,
7831    and will reduce to a PC-relative relocation in the object file.
7832    Expressions like this can be left alone when generating PIC, rather
7833    than forced through the GOT.  */
7834 static int
7835 pcrel_constant_p (rtx x)
7836 {
7837   if (GET_CODE (x) == MINUS)
7838     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7839
7840   return FALSE;
7841 }
7842
7843 /* Return true if X will surely end up in an index register after next
7844    splitting pass.  */
7845 static bool
7846 will_be_in_index_register (const_rtx x)
7847 {
7848   /* arm.md: calculate_pic_address will split this into a register.  */
7849   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7850 }
7851
7852 /* Return nonzero if X is a valid ARM state address operand.  */
7853 int
7854 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7855                                 int strict_p)
7856 {
7857   bool use_ldrd;
7858   enum rtx_code code = GET_CODE (x);
7859
7860   if (arm_address_register_rtx_p (x, strict_p))
7861     return 1;
7862
7863   use_ldrd = (TARGET_LDRD
7864               && (mode == DImode || mode == DFmode));
7865
7866   if (code == POST_INC || code == PRE_DEC
7867       || ((code == PRE_INC || code == POST_DEC)
7868           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7869     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7870
7871   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7872            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7873            && GET_CODE (XEXP (x, 1)) == PLUS
7874            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7875     {
7876       rtx addend = XEXP (XEXP (x, 1), 1);
7877
7878       /* Don't allow ldrd post increment by register because it's hard
7879          to fixup invalid register choices.  */
7880       if (use_ldrd
7881           && GET_CODE (x) == POST_MODIFY
7882           && REG_P (addend))
7883         return 0;
7884
7885       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7886               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7887     }
7888
7889   /* After reload constants split into minipools will have addresses
7890      from a LABEL_REF.  */
7891   else if (reload_completed
7892            && (code == LABEL_REF
7893                || (code == CONST
7894                    && GET_CODE (XEXP (x, 0)) == PLUS
7895                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7896                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7897     return 1;
7898
7899   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7900     return 0;
7901
7902   else if (code == PLUS)
7903     {
7904       rtx xop0 = XEXP (x, 0);
7905       rtx xop1 = XEXP (x, 1);
7906
7907       return ((arm_address_register_rtx_p (xop0, strict_p)
7908                && ((CONST_INT_P (xop1)
7909                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7910                    || (!strict_p && will_be_in_index_register (xop1))))
7911               || (arm_address_register_rtx_p (xop1, strict_p)
7912                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7913     }
7914
7915 #if 0
7916   /* Reload currently can't handle MINUS, so disable this for now */
7917   else if (GET_CODE (x) == MINUS)
7918     {
7919       rtx xop0 = XEXP (x, 0);
7920       rtx xop1 = XEXP (x, 1);
7921
7922       return (arm_address_register_rtx_p (xop0, strict_p)
7923               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7924     }
7925 #endif
7926
7927   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7928            && code == SYMBOL_REF
7929            && CONSTANT_POOL_ADDRESS_P (x)
7930            && ! (flag_pic
7931                  && symbol_mentioned_p (get_pool_constant (x))
7932                  && ! pcrel_constant_p (get_pool_constant (x))))
7933     return 1;
7934
7935   return 0;
7936 }
7937
7938 /* Return true if we can avoid creating a constant pool entry for x.  */
7939 static bool
7940 can_avoid_literal_pool_for_label_p (rtx x)
7941 {
7942   /* Normally we can assign constant values to target registers without
7943      the help of constant pool.  But there are cases we have to use constant
7944      pool like:
7945      1) assign a label to register.
7946      2) sign-extend a 8bit value to 32bit and then assign to register.
7947
7948      Constant pool access in format:
7949      (set (reg r0) (mem (symbol_ref (".LC0"))))
7950      will cause the use of literal pool (later in function arm_reorg).
7951      So here we mark such format as an invalid format, then the compiler
7952      will adjust it into:
7953      (set (reg r0) (symbol_ref (".LC0")))
7954      (set (reg r0) (mem (reg r0))).
7955      No extra register is required, and (mem (reg r0)) won't cause the use
7956      of literal pools.  */
7957   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7958       && CONSTANT_POOL_ADDRESS_P (x))
7959     return 1;
7960   return 0;
7961 }
7962
7963
7964 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7965 static int
7966 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7967 {
7968   bool use_ldrd;
7969   enum rtx_code code = GET_CODE (x);
7970
7971   if (arm_address_register_rtx_p (x, strict_p))
7972     return 1;
7973
7974   use_ldrd = (TARGET_LDRD
7975               && (mode == DImode || mode == DFmode));
7976
7977   if (code == POST_INC || code == PRE_DEC
7978       || ((code == PRE_INC || code == POST_DEC)
7979           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7980     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7981
7982   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7983            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7984            && GET_CODE (XEXP (x, 1)) == PLUS
7985            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7986     {
7987       /* Thumb-2 only has autoincrement by constant.  */
7988       rtx addend = XEXP (XEXP (x, 1), 1);
7989       HOST_WIDE_INT offset;
7990
7991       if (!CONST_INT_P (addend))
7992         return 0;
7993
7994       offset = INTVAL(addend);
7995       if (GET_MODE_SIZE (mode) <= 4)
7996         return (offset > -256 && offset < 256);
7997
7998       return (use_ldrd && offset > -1024 && offset < 1024
7999               && (offset & 3) == 0);
8000     }
8001
8002   /* After reload constants split into minipools will have addresses
8003      from a LABEL_REF.  */
8004   else if (reload_completed
8005            && (code == LABEL_REF
8006                || (code == CONST
8007                    && GET_CODE (XEXP (x, 0)) == PLUS
8008                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8009                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8010     return 1;
8011
8012   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8013     return 0;
8014
8015   else if (code == PLUS)
8016     {
8017       rtx xop0 = XEXP (x, 0);
8018       rtx xop1 = XEXP (x, 1);
8019
8020       return ((arm_address_register_rtx_p (xop0, strict_p)
8021                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8022                    || (!strict_p && will_be_in_index_register (xop1))))
8023               || (arm_address_register_rtx_p (xop1, strict_p)
8024                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8025     }
8026
8027   else if (can_avoid_literal_pool_for_label_p (x))
8028     return 0;
8029
8030   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8031            && code == SYMBOL_REF
8032            && CONSTANT_POOL_ADDRESS_P (x)
8033            && ! (flag_pic
8034                  && symbol_mentioned_p (get_pool_constant (x))
8035                  && ! pcrel_constant_p (get_pool_constant (x))))
8036     return 1;
8037
8038   return 0;
8039 }
8040
8041 /* Return nonzero if INDEX is valid for an address index operand in
8042    ARM state.  */
8043 static int
8044 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8045                         int strict_p)
8046 {
8047   HOST_WIDE_INT range;
8048   enum rtx_code code = GET_CODE (index);
8049
8050   /* Standard coprocessor addressing modes.  */
8051   if (TARGET_HARD_FLOAT
8052       && (mode == SFmode || mode == DFmode))
8053     return (code == CONST_INT && INTVAL (index) < 1024
8054             && INTVAL (index) > -1024
8055             && (INTVAL (index) & 3) == 0);
8056
8057   /* For quad modes, we restrict the constant offset to be slightly less
8058      than what the instruction format permits.  We do this because for
8059      quad mode moves, we will actually decompose them into two separate
8060      double-mode reads or writes.  INDEX must therefore be a valid
8061      (double-mode) offset and so should INDEX+8.  */
8062   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8063     return (code == CONST_INT
8064             && INTVAL (index) < 1016
8065             && INTVAL (index) > -1024
8066             && (INTVAL (index) & 3) == 0);
8067
8068   /* We have no such constraint on double mode offsets, so we permit the
8069      full range of the instruction format.  */
8070   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8071     return (code == CONST_INT
8072             && INTVAL (index) < 1024
8073             && INTVAL (index) > -1024
8074             && (INTVAL (index) & 3) == 0);
8075
8076   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8077     return (code == CONST_INT
8078             && INTVAL (index) < 1024
8079             && INTVAL (index) > -1024
8080             && (INTVAL (index) & 3) == 0);
8081
8082   if (arm_address_register_rtx_p (index, strict_p)
8083       && (GET_MODE_SIZE (mode) <= 4))
8084     return 1;
8085
8086   if (mode == DImode || mode == DFmode)
8087     {
8088       if (code == CONST_INT)
8089         {
8090           HOST_WIDE_INT val = INTVAL (index);
8091
8092           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8093              If vldr is selected it uses arm_coproc_mem_operand.  */
8094           if (TARGET_LDRD)
8095             return val > -256 && val < 256;
8096           else
8097             return val > -4096 && val < 4092;
8098         }
8099
8100       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8101     }
8102
8103   if (GET_MODE_SIZE (mode) <= 4
8104       && ! (arm_arch4
8105             && (mode == HImode
8106                 || mode == HFmode
8107                 || (mode == QImode && outer == SIGN_EXTEND))))
8108     {
8109       if (code == MULT)
8110         {
8111           rtx xiop0 = XEXP (index, 0);
8112           rtx xiop1 = XEXP (index, 1);
8113
8114           return ((arm_address_register_rtx_p (xiop0, strict_p)
8115                    && power_of_two_operand (xiop1, SImode))
8116                   || (arm_address_register_rtx_p (xiop1, strict_p)
8117                       && power_of_two_operand (xiop0, SImode)));
8118         }
8119       else if (code == LSHIFTRT || code == ASHIFTRT
8120                || code == ASHIFT || code == ROTATERT)
8121         {
8122           rtx op = XEXP (index, 1);
8123
8124           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8125                   && CONST_INT_P (op)
8126                   && INTVAL (op) > 0
8127                   && INTVAL (op) <= 31);
8128         }
8129     }
8130
8131   /* For ARM v4 we may be doing a sign-extend operation during the
8132      load.  */
8133   if (arm_arch4)
8134     {
8135       if (mode == HImode
8136           || mode == HFmode
8137           || (outer == SIGN_EXTEND && mode == QImode))
8138         range = 256;
8139       else
8140         range = 4096;
8141     }
8142   else
8143     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8144
8145   return (code == CONST_INT
8146           && INTVAL (index) < range
8147           && INTVAL (index) > -range);
8148 }
8149
8150 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8151    index operand.  i.e. 1, 2, 4 or 8.  */
8152 static bool
8153 thumb2_index_mul_operand (rtx op)
8154 {
8155   HOST_WIDE_INT val;
8156
8157   if (!CONST_INT_P (op))
8158     return false;
8159
8160   val = INTVAL(op);
8161   return (val == 1 || val == 2 || val == 4 || val == 8);
8162 }
8163
8164 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8165 static int
8166 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8167 {
8168   enum rtx_code code = GET_CODE (index);
8169
8170   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8171   /* Standard coprocessor addressing modes.  */
8172   if (TARGET_HARD_FLOAT
8173       && (mode == SFmode || mode == DFmode))
8174     return (code == CONST_INT && INTVAL (index) < 1024
8175             /* Thumb-2 allows only > -256 index range for it's core register
8176                load/stores. Since we allow SF/DF in core registers, we have
8177                to use the intersection between -256~4096 (core) and -1024~1024
8178                (coprocessor).  */
8179             && INTVAL (index) > -256
8180             && (INTVAL (index) & 3) == 0);
8181
8182   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8183     {
8184       /* For DImode assume values will usually live in core regs
8185          and only allow LDRD addressing modes.  */
8186       if (!TARGET_LDRD || mode != DImode)
8187         return (code == CONST_INT
8188                 && INTVAL (index) < 1024
8189                 && INTVAL (index) > -1024
8190                 && (INTVAL (index) & 3) == 0);
8191     }
8192
8193   /* For quad modes, we restrict the constant offset to be slightly less
8194      than what the instruction format permits.  We do this because for
8195      quad mode moves, we will actually decompose them into two separate
8196      double-mode reads or writes.  INDEX must therefore be a valid
8197      (double-mode) offset and so should INDEX+8.  */
8198   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8199     return (code == CONST_INT
8200             && INTVAL (index) < 1016
8201             && INTVAL (index) > -1024
8202             && (INTVAL (index) & 3) == 0);
8203
8204   /* We have no such constraint on double mode offsets, so we permit the
8205      full range of the instruction format.  */
8206   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8207     return (code == CONST_INT
8208             && INTVAL (index) < 1024
8209             && INTVAL (index) > -1024
8210             && (INTVAL (index) & 3) == 0);
8211
8212   if (arm_address_register_rtx_p (index, strict_p)
8213       && (GET_MODE_SIZE (mode) <= 4))
8214     return 1;
8215
8216   if (mode == DImode || mode == DFmode)
8217     {
8218       if (code == CONST_INT)
8219         {
8220           HOST_WIDE_INT val = INTVAL (index);
8221           /* Thumb-2 ldrd only has reg+const addressing modes.
8222              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8223              If vldr is selected it uses arm_coproc_mem_operand.  */
8224           if (TARGET_LDRD)
8225             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8226           else
8227             return IN_RANGE (val, -255, 4095 - 4);
8228         }
8229       else
8230         return 0;
8231     }
8232
8233   if (code == MULT)
8234     {
8235       rtx xiop0 = XEXP (index, 0);
8236       rtx xiop1 = XEXP (index, 1);
8237
8238       return ((arm_address_register_rtx_p (xiop0, strict_p)
8239                && thumb2_index_mul_operand (xiop1))
8240               || (arm_address_register_rtx_p (xiop1, strict_p)
8241                   && thumb2_index_mul_operand (xiop0)));
8242     }
8243   else if (code == ASHIFT)
8244     {
8245       rtx op = XEXP (index, 1);
8246
8247       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8248               && CONST_INT_P (op)
8249               && INTVAL (op) > 0
8250               && INTVAL (op) <= 3);
8251     }
8252
8253   return (code == CONST_INT
8254           && INTVAL (index) < 4096
8255           && INTVAL (index) > -256);
8256 }
8257
8258 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8259 static int
8260 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8261 {
8262   int regno;
8263
8264   if (!REG_P (x))
8265     return 0;
8266
8267   regno = REGNO (x);
8268
8269   if (strict_p)
8270     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8271
8272   return (regno <= LAST_LO_REGNUM
8273           || regno > LAST_VIRTUAL_REGISTER
8274           || regno == FRAME_POINTER_REGNUM
8275           || (GET_MODE_SIZE (mode) >= 4
8276               && (regno == STACK_POINTER_REGNUM
8277                   || regno >= FIRST_PSEUDO_REGISTER
8278                   || x == hard_frame_pointer_rtx
8279                   || x == arg_pointer_rtx)));
8280 }
8281
8282 /* Return nonzero if x is a legitimate index register.  This is the case
8283    for any base register that can access a QImode object.  */
8284 inline static int
8285 thumb1_index_register_rtx_p (rtx x, int strict_p)
8286 {
8287   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8288 }
8289
8290 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8291
8292    The AP may be eliminated to either the SP or the FP, so we use the
8293    least common denominator, e.g. SImode, and offsets from 0 to 64.
8294
8295    ??? Verify whether the above is the right approach.
8296
8297    ??? Also, the FP may be eliminated to the SP, so perhaps that
8298    needs special handling also.
8299
8300    ??? Look at how the mips16 port solves this problem.  It probably uses
8301    better ways to solve some of these problems.
8302
8303    Although it is not incorrect, we don't accept QImode and HImode
8304    addresses based on the frame pointer or arg pointer until the
8305    reload pass starts.  This is so that eliminating such addresses
8306    into stack based ones won't produce impossible code.  */
8307 int
8308 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8309 {
8310   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8311     return 0;
8312
8313   /* ??? Not clear if this is right.  Experiment.  */
8314   if (GET_MODE_SIZE (mode) < 4
8315       && !(reload_in_progress || reload_completed)
8316       && (reg_mentioned_p (frame_pointer_rtx, x)
8317           || reg_mentioned_p (arg_pointer_rtx, x)
8318           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8319           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8320           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8321           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8322     return 0;
8323
8324   /* Accept any base register.  SP only in SImode or larger.  */
8325   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8326     return 1;
8327
8328   /* This is PC relative data before arm_reorg runs.  */
8329   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8330            && GET_CODE (x) == SYMBOL_REF
8331            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8332     return 1;
8333
8334   /* This is PC relative data after arm_reorg runs.  */
8335   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8336            && reload_completed
8337            && (GET_CODE (x) == LABEL_REF
8338                || (GET_CODE (x) == CONST
8339                    && GET_CODE (XEXP (x, 0)) == PLUS
8340                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8341                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8342     return 1;
8343
8344   /* Post-inc indexing only supported for SImode and larger.  */
8345   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8346            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8347     return 1;
8348
8349   else if (GET_CODE (x) == PLUS)
8350     {
8351       /* REG+REG address can be any two index registers.  */
8352       /* We disallow FRAME+REG addressing since we know that FRAME
8353          will be replaced with STACK, and SP relative addressing only
8354          permits SP+OFFSET.  */
8355       if (GET_MODE_SIZE (mode) <= 4
8356           && XEXP (x, 0) != frame_pointer_rtx
8357           && XEXP (x, 1) != frame_pointer_rtx
8358           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8359           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8360               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8361         return 1;
8362
8363       /* REG+const has 5-7 bit offset for non-SP registers.  */
8364       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8365                 || XEXP (x, 0) == arg_pointer_rtx)
8366                && CONST_INT_P (XEXP (x, 1))
8367                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8368         return 1;
8369
8370       /* REG+const has 10-bit offset for SP, but only SImode and
8371          larger is supported.  */
8372       /* ??? Should probably check for DI/DFmode overflow here
8373          just like GO_IF_LEGITIMATE_OFFSET does.  */
8374       else if (REG_P (XEXP (x, 0))
8375                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8376                && GET_MODE_SIZE (mode) >= 4
8377                && CONST_INT_P (XEXP (x, 1))
8378                && INTVAL (XEXP (x, 1)) >= 0
8379                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8380                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8381         return 1;
8382
8383       else if (REG_P (XEXP (x, 0))
8384                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8385                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8386                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8387                        && REGNO (XEXP (x, 0))
8388                           <= LAST_VIRTUAL_POINTER_REGISTER))
8389                && GET_MODE_SIZE (mode) >= 4
8390                && CONST_INT_P (XEXP (x, 1))
8391                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8392         return 1;
8393     }
8394
8395   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8396            && GET_MODE_SIZE (mode) == 4
8397            && GET_CODE (x) == SYMBOL_REF
8398            && CONSTANT_POOL_ADDRESS_P (x)
8399            && ! (flag_pic
8400                  && symbol_mentioned_p (get_pool_constant (x))
8401                  && ! pcrel_constant_p (get_pool_constant (x))))
8402     return 1;
8403
8404   return 0;
8405 }
8406
8407 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8408    instruction of mode MODE.  */
8409 int
8410 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8411 {
8412   switch (GET_MODE_SIZE (mode))
8413     {
8414     case 1:
8415       return val >= 0 && val < 32;
8416
8417     case 2:
8418       return val >= 0 && val < 64 && (val & 1) == 0;
8419
8420     default:
8421       return (val >= 0
8422               && (val + GET_MODE_SIZE (mode)) <= 128
8423               && (val & 3) == 0);
8424     }
8425 }
8426
8427 bool
8428 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8429 {
8430   if (TARGET_ARM)
8431     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8432   else if (TARGET_THUMB2)
8433     return thumb2_legitimate_address_p (mode, x, strict_p);
8434   else /* if (TARGET_THUMB1) */
8435     return thumb1_legitimate_address_p (mode, x, strict_p);
8436 }
8437
8438 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8439
8440    Given an rtx X being reloaded into a reg required to be
8441    in class CLASS, return the class of reg to actually use.
8442    In general this is just CLASS, but for the Thumb core registers and
8443    immediate constants we prefer a LO_REGS class or a subset.  */
8444
8445 static reg_class_t
8446 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8447 {
8448   if (TARGET_32BIT)
8449     return rclass;
8450   else
8451     {
8452       if (rclass == GENERAL_REGS)
8453         return LO_REGS;
8454       else
8455         return rclass;
8456     }
8457 }
8458
8459 /* Build the SYMBOL_REF for __tls_get_addr.  */
8460
8461 static GTY(()) rtx tls_get_addr_libfunc;
8462
8463 static rtx
8464 get_tls_get_addr (void)
8465 {
8466   if (!tls_get_addr_libfunc)
8467     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8468   return tls_get_addr_libfunc;
8469 }
8470
8471 rtx
8472 arm_load_tp (rtx target)
8473 {
8474   if (!target)
8475     target = gen_reg_rtx (SImode);
8476
8477   if (TARGET_HARD_TP)
8478     {
8479       /* Can return in any reg.  */
8480       emit_insn (gen_load_tp_hard (target));
8481     }
8482   else
8483     {
8484       /* Always returned in r0.  Immediately copy the result into a pseudo,
8485          otherwise other uses of r0 (e.g. setting up function arguments) may
8486          clobber the value.  */
8487
8488       rtx tmp;
8489
8490       emit_insn (gen_load_tp_soft ());
8491
8492       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8493       emit_move_insn (target, tmp);
8494     }
8495   return target;
8496 }
8497
8498 static rtx
8499 load_tls_operand (rtx x, rtx reg)
8500 {
8501   rtx tmp;
8502
8503   if (reg == NULL_RTX)
8504     reg = gen_reg_rtx (SImode);
8505
8506   tmp = gen_rtx_CONST (SImode, x);
8507
8508   emit_move_insn (reg, tmp);
8509
8510   return reg;
8511 }
8512
8513 static rtx_insn *
8514 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8515 {
8516   rtx label, labelno, sum;
8517
8518   gcc_assert (reloc != TLS_DESCSEQ);
8519   start_sequence ();
8520
8521   labelno = GEN_INT (pic_labelno++);
8522   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8523   label = gen_rtx_CONST (VOIDmode, label);
8524
8525   sum = gen_rtx_UNSPEC (Pmode,
8526                         gen_rtvec (4, x, GEN_INT (reloc), label,
8527                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8528                         UNSPEC_TLS);
8529   reg = load_tls_operand (sum, reg);
8530
8531   if (TARGET_ARM)
8532     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8533   else
8534     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8535
8536   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8537                                      LCT_PURE, /* LCT_CONST?  */
8538                                      Pmode, reg, Pmode);
8539
8540   rtx_insn *insns = get_insns ();
8541   end_sequence ();
8542
8543   return insns;
8544 }
8545
8546 static rtx
8547 arm_tls_descseq_addr (rtx x, rtx reg)
8548 {
8549   rtx labelno = GEN_INT (pic_labelno++);
8550   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8551   rtx sum = gen_rtx_UNSPEC (Pmode,
8552                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8553                                        gen_rtx_CONST (VOIDmode, label),
8554                                        GEN_INT (!TARGET_ARM)),
8555                             UNSPEC_TLS);
8556   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8557
8558   emit_insn (gen_tlscall (x, labelno));
8559   if (!reg)
8560     reg = gen_reg_rtx (SImode);
8561   else
8562     gcc_assert (REGNO (reg) != R0_REGNUM);
8563
8564   emit_move_insn (reg, reg0);
8565
8566   return reg;
8567 }
8568
8569 rtx
8570 legitimize_tls_address (rtx x, rtx reg)
8571 {
8572   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8573   rtx_insn *insns;
8574   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8575
8576   switch (model)
8577     {
8578     case TLS_MODEL_GLOBAL_DYNAMIC:
8579       if (TARGET_GNU2_TLS)
8580         {
8581           reg = arm_tls_descseq_addr (x, reg);
8582
8583           tp = arm_load_tp (NULL_RTX);
8584
8585           dest = gen_rtx_PLUS (Pmode, tp, reg);
8586         }
8587       else
8588         {
8589           /* Original scheme */
8590           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8591           dest = gen_reg_rtx (Pmode);
8592           emit_libcall_block (insns, dest, ret, x);
8593         }
8594       return dest;
8595
8596     case TLS_MODEL_LOCAL_DYNAMIC:
8597       if (TARGET_GNU2_TLS)
8598         {
8599           reg = arm_tls_descseq_addr (x, reg);
8600
8601           tp = arm_load_tp (NULL_RTX);
8602
8603           dest = gen_rtx_PLUS (Pmode, tp, reg);
8604         }
8605       else
8606         {
8607           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8608
8609           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8610              share the LDM result with other LD model accesses.  */
8611           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8612                                 UNSPEC_TLS);
8613           dest = gen_reg_rtx (Pmode);
8614           emit_libcall_block (insns, dest, ret, eqv);
8615
8616           /* Load the addend.  */
8617           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8618                                                      GEN_INT (TLS_LDO32)),
8619                                    UNSPEC_TLS);
8620           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8621           dest = gen_rtx_PLUS (Pmode, dest, addend);
8622         }
8623       return dest;
8624
8625     case TLS_MODEL_INITIAL_EXEC:
8626       labelno = GEN_INT (pic_labelno++);
8627       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8628       label = gen_rtx_CONST (VOIDmode, label);
8629       sum = gen_rtx_UNSPEC (Pmode,
8630                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8631                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8632                             UNSPEC_TLS);
8633       reg = load_tls_operand (sum, reg);
8634
8635       if (TARGET_ARM)
8636         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8637       else if (TARGET_THUMB2)
8638         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8639       else
8640         {
8641           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8642           emit_move_insn (reg, gen_const_mem (SImode, reg));
8643         }
8644
8645       tp = arm_load_tp (NULL_RTX);
8646
8647       return gen_rtx_PLUS (Pmode, tp, reg);
8648
8649     case TLS_MODEL_LOCAL_EXEC:
8650       tp = arm_load_tp (NULL_RTX);
8651
8652       reg = gen_rtx_UNSPEC (Pmode,
8653                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8654                             UNSPEC_TLS);
8655       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8656
8657       return gen_rtx_PLUS (Pmode, tp, reg);
8658
8659     default:
8660       abort ();
8661     }
8662 }
8663
8664 /* Try machine-dependent ways of modifying an illegitimate address
8665    to be legitimate.  If we find one, return the new, valid address.  */
8666 rtx
8667 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8668 {
8669   if (arm_tls_referenced_p (x))
8670     {
8671       rtx addend = NULL;
8672
8673       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8674         {
8675           addend = XEXP (XEXP (x, 0), 1);
8676           x = XEXP (XEXP (x, 0), 0);
8677         }
8678
8679       if (GET_CODE (x) != SYMBOL_REF)
8680         return x;
8681
8682       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8683
8684       x = legitimize_tls_address (x, NULL_RTX);
8685
8686       if (addend)
8687         {
8688           x = gen_rtx_PLUS (SImode, x, addend);
8689           orig_x = x;
8690         }
8691       else
8692         return x;
8693     }
8694
8695   if (!TARGET_ARM)
8696     {
8697       /* TODO: legitimize_address for Thumb2.  */
8698       if (TARGET_THUMB2)
8699         return x;
8700       return thumb_legitimize_address (x, orig_x, mode);
8701     }
8702
8703   if (GET_CODE (x) == PLUS)
8704     {
8705       rtx xop0 = XEXP (x, 0);
8706       rtx xop1 = XEXP (x, 1);
8707
8708       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8709         xop0 = force_reg (SImode, xop0);
8710
8711       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8712           && !symbol_mentioned_p (xop1))
8713         xop1 = force_reg (SImode, xop1);
8714
8715       if (ARM_BASE_REGISTER_RTX_P (xop0)
8716           && CONST_INT_P (xop1))
8717         {
8718           HOST_WIDE_INT n, low_n;
8719           rtx base_reg, val;
8720           n = INTVAL (xop1);
8721
8722           /* VFP addressing modes actually allow greater offsets, but for
8723              now we just stick with the lowest common denominator.  */
8724           if (mode == DImode || mode == DFmode)
8725             {
8726               low_n = n & 0x0f;
8727               n &= ~0x0f;
8728               if (low_n > 4)
8729                 {
8730                   n += 16;
8731                   low_n -= 16;
8732                 }
8733             }
8734           else
8735             {
8736               low_n = ((mode) == TImode ? 0
8737                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8738               n -= low_n;
8739             }
8740
8741           base_reg = gen_reg_rtx (SImode);
8742           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8743           emit_move_insn (base_reg, val);
8744           x = plus_constant (Pmode, base_reg, low_n);
8745         }
8746       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8747         x = gen_rtx_PLUS (SImode, xop0, xop1);
8748     }
8749
8750   /* XXX We don't allow MINUS any more -- see comment in
8751      arm_legitimate_address_outer_p ().  */
8752   else if (GET_CODE (x) == MINUS)
8753     {
8754       rtx xop0 = XEXP (x, 0);
8755       rtx xop1 = XEXP (x, 1);
8756
8757       if (CONSTANT_P (xop0))
8758         xop0 = force_reg (SImode, xop0);
8759
8760       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8761         xop1 = force_reg (SImode, xop1);
8762
8763       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8764         x = gen_rtx_MINUS (SImode, xop0, xop1);
8765     }
8766
8767   /* Make sure to take full advantage of the pre-indexed addressing mode
8768      with absolute addresses which often allows for the base register to
8769      be factorized for multiple adjacent memory references, and it might
8770      even allows for the mini pool to be avoided entirely. */
8771   else if (CONST_INT_P (x) && optimize > 0)
8772     {
8773       unsigned int bits;
8774       HOST_WIDE_INT mask, base, index;
8775       rtx base_reg;
8776
8777       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8778          use a 8-bit index. So let's use a 12-bit index for SImode only and
8779          hope that arm_gen_constant will enable ldrb to use more bits. */
8780       bits = (mode == SImode) ? 12 : 8;
8781       mask = (1 << bits) - 1;
8782       base = INTVAL (x) & ~mask;
8783       index = INTVAL (x) & mask;
8784       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8785         {
8786           /* It'll most probably be more efficient to generate the base
8787              with more bits set and use a negative index instead. */
8788           base |= mask;
8789           index -= mask;
8790         }
8791       base_reg = force_reg (SImode, GEN_INT (base));
8792       x = plus_constant (Pmode, base_reg, index);
8793     }
8794
8795   if (flag_pic)
8796     {
8797       /* We need to find and carefully transform any SYMBOL and LABEL
8798          references; so go back to the original address expression.  */
8799       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8800                                           false /*compute_now*/);
8801
8802       if (new_x != orig_x)
8803         x = new_x;
8804     }
8805
8806   return x;
8807 }
8808
8809
8810 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8811    to be legitimate.  If we find one, return the new, valid address.  */
8812 rtx
8813 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8814 {
8815   if (GET_CODE (x) == PLUS
8816       && CONST_INT_P (XEXP (x, 1))
8817       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8818           || INTVAL (XEXP (x, 1)) < 0))
8819     {
8820       rtx xop0 = XEXP (x, 0);
8821       rtx xop1 = XEXP (x, 1);
8822       HOST_WIDE_INT offset = INTVAL (xop1);
8823
8824       /* Try and fold the offset into a biasing of the base register and
8825          then offsetting that.  Don't do this when optimizing for space
8826          since it can cause too many CSEs.  */
8827       if (optimize_size && offset >= 0
8828           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8829         {
8830           HOST_WIDE_INT delta;
8831
8832           if (offset >= 256)
8833             delta = offset - (256 - GET_MODE_SIZE (mode));
8834           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8835             delta = 31 * GET_MODE_SIZE (mode);
8836           else
8837             delta = offset & (~31 * GET_MODE_SIZE (mode));
8838
8839           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8840                                 NULL_RTX);
8841           x = plus_constant (Pmode, xop0, delta);
8842         }
8843       else if (offset < 0 && offset > -256)
8844         /* Small negative offsets are best done with a subtract before the
8845            dereference, forcing these into a register normally takes two
8846            instructions.  */
8847         x = force_operand (x, NULL_RTX);
8848       else
8849         {
8850           /* For the remaining cases, force the constant into a register.  */
8851           xop1 = force_reg (SImode, xop1);
8852           x = gen_rtx_PLUS (SImode, xop0, xop1);
8853         }
8854     }
8855   else if (GET_CODE (x) == PLUS
8856            && s_register_operand (XEXP (x, 1), SImode)
8857            && !s_register_operand (XEXP (x, 0), SImode))
8858     {
8859       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8860
8861       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8862     }
8863
8864   if (flag_pic)
8865     {
8866       /* We need to find and carefully transform any SYMBOL and LABEL
8867          references; so go back to the original address expression.  */
8868       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8869                                           false /*compute_now*/);
8870
8871       if (new_x != orig_x)
8872         x = new_x;
8873     }
8874
8875   return x;
8876 }
8877
8878 /* Return TRUE if X contains any TLS symbol references.  */
8879
8880 bool
8881 arm_tls_referenced_p (rtx x)
8882 {
8883   if (! TARGET_HAVE_TLS)
8884     return false;
8885
8886   subrtx_iterator::array_type array;
8887   FOR_EACH_SUBRTX (iter, array, x, ALL)
8888     {
8889       const_rtx x = *iter;
8890       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8891         {
8892           /* ARM currently does not provide relocations to encode TLS variables
8893              into AArch32 instructions, only data, so there is no way to
8894              currently implement these if a literal pool is disabled.  */
8895           if (arm_disable_literal_pool)
8896             sorry ("accessing thread-local storage is not currently supported "
8897                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
8898
8899           return true;
8900         }
8901
8902       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8903          TLS offsets, not real symbol references.  */
8904       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8905         iter.skip_subrtxes ();
8906     }
8907   return false;
8908 }
8909
8910 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8911
8912    On the ARM, allow any integer (invalid ones are removed later by insn
8913    patterns), nice doubles and symbol_refs which refer to the function's
8914    constant pool XXX.
8915
8916    When generating pic allow anything.  */
8917
8918 static bool
8919 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8920 {
8921   return flag_pic || !label_mentioned_p (x);
8922 }
8923
8924 static bool
8925 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8926 {
8927   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8928      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8929      for ARMv8-M Baseline or later the result is valid.  */
8930   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8931     x = XEXP (x, 0);
8932
8933   return (CONST_INT_P (x)
8934           || CONST_DOUBLE_P (x)
8935           || CONSTANT_ADDRESS_P (x)
8936           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8937           || flag_pic);
8938 }
8939
8940 static bool
8941 arm_legitimate_constant_p (machine_mode mode, rtx x)
8942 {
8943   return (!arm_cannot_force_const_mem (mode, x)
8944           && (TARGET_32BIT
8945               ? arm_legitimate_constant_p_1 (mode, x)
8946               : thumb_legitimate_constant_p (mode, x)));
8947 }
8948
8949 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8950
8951 static bool
8952 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8953 {
8954   rtx base, offset;
8955   split_const (x, &base, &offset);
8956
8957   if (SYMBOL_REF_P (base))
8958     {
8959       /* Function symbols cannot have an offset due to the Thumb bit.  */
8960       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
8961           && INTVAL (offset) != 0)
8962         return true;
8963
8964       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
8965           && !offset_within_block_p (base, INTVAL (offset)))
8966         return true;
8967     }
8968   return arm_tls_referenced_p (x);
8969 }
8970 \f
8971 #define REG_OR_SUBREG_REG(X)                                            \
8972   (REG_P (X)                                                    \
8973    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8974
8975 #define REG_OR_SUBREG_RTX(X)                    \
8976    (REG_P (X) ? (X) : SUBREG_REG (X))
8977
8978 static inline int
8979 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8980 {
8981   machine_mode mode = GET_MODE (x);
8982   int total, words;
8983
8984   switch (code)
8985     {
8986     case ASHIFT:
8987     case ASHIFTRT:
8988     case LSHIFTRT:
8989     case ROTATERT:
8990       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8991
8992     case PLUS:
8993     case MINUS:
8994     case COMPARE:
8995     case NEG:
8996     case NOT:
8997       return COSTS_N_INSNS (1);
8998
8999     case MULT:
9000       if (arm_arch6m && arm_m_profile_small_mul)
9001         return COSTS_N_INSNS (32);
9002
9003       if (CONST_INT_P (XEXP (x, 1)))
9004         {
9005           int cycles = 0;
9006           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9007
9008           while (i)
9009             {
9010               i >>= 2;
9011               cycles++;
9012             }
9013           return COSTS_N_INSNS (2) + cycles;
9014         }
9015       return COSTS_N_INSNS (1) + 16;
9016
9017     case SET:
9018       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9019          the mode.  */
9020       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9021       return (COSTS_N_INSNS (words)
9022               + 4 * ((MEM_P (SET_SRC (x)))
9023                      + MEM_P (SET_DEST (x))));
9024
9025     case CONST_INT:
9026       if (outer == SET)
9027         {
9028           if (UINTVAL (x) < 256
9029               /* 16-bit constant.  */
9030               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9031             return 0;
9032           if (thumb_shiftable_const (INTVAL (x)))
9033             return COSTS_N_INSNS (2);
9034           return COSTS_N_INSNS (3);
9035         }
9036       else if ((outer == PLUS || outer == COMPARE)
9037                && INTVAL (x) < 256 && INTVAL (x) > -256)
9038         return 0;
9039       else if ((outer == IOR || outer == XOR || outer == AND)
9040                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9041         return COSTS_N_INSNS (1);
9042       else if (outer == AND)
9043         {
9044           int i;
9045           /* This duplicates the tests in the andsi3 expander.  */
9046           for (i = 9; i <= 31; i++)
9047             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9048                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9049               return COSTS_N_INSNS (2);
9050         }
9051       else if (outer == ASHIFT || outer == ASHIFTRT
9052                || outer == LSHIFTRT)
9053         return 0;
9054       return COSTS_N_INSNS (2);
9055
9056     case CONST:
9057     case CONST_DOUBLE:
9058     case LABEL_REF:
9059     case SYMBOL_REF:
9060       return COSTS_N_INSNS (3);
9061
9062     case UDIV:
9063     case UMOD:
9064     case DIV:
9065     case MOD:
9066       return 100;
9067
9068     case TRUNCATE:
9069       return 99;
9070
9071     case AND:
9072     case XOR:
9073     case IOR:
9074       /* XXX guess.  */
9075       return 8;
9076
9077     case MEM:
9078       /* XXX another guess.  */
9079       /* Memory costs quite a lot for the first word, but subsequent words
9080          load at the equivalent of a single insn each.  */
9081       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9082               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9083                  ? 4 : 0));
9084
9085     case IF_THEN_ELSE:
9086       /* XXX a guess.  */
9087       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9088         return 14;
9089       return 2;
9090
9091     case SIGN_EXTEND:
9092     case ZERO_EXTEND:
9093       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9094       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9095
9096       if (mode == SImode)
9097         return total;
9098
9099       if (arm_arch6)
9100         return total + COSTS_N_INSNS (1);
9101
9102       /* Assume a two-shift sequence.  Increase the cost slightly so
9103          we prefer actual shifts over an extend operation.  */
9104       return total + 1 + COSTS_N_INSNS (2);
9105
9106     default:
9107       return 99;
9108     }
9109 }
9110
9111 /* Estimates the size cost of thumb1 instructions.
9112    For now most of the code is copied from thumb1_rtx_costs. We need more
9113    fine grain tuning when we have more related test cases.  */
9114 static inline int
9115 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9116 {
9117   machine_mode mode = GET_MODE (x);
9118   int words, cost;
9119
9120   switch (code)
9121     {
9122     case ASHIFT:
9123     case ASHIFTRT:
9124     case LSHIFTRT:
9125     case ROTATERT:
9126       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9127
9128     case PLUS:
9129     case MINUS:
9130       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9131          defined by RTL expansion, especially for the expansion of
9132          multiplication.  */
9133       if ((GET_CODE (XEXP (x, 0)) == MULT
9134            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9135           || (GET_CODE (XEXP (x, 1)) == MULT
9136               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9137         return COSTS_N_INSNS (2);
9138       /* Fall through.  */
9139     case COMPARE:
9140     case NEG:
9141     case NOT:
9142       return COSTS_N_INSNS (1);
9143
9144     case MULT:
9145       if (CONST_INT_P (XEXP (x, 1)))
9146         {
9147           /* Thumb1 mul instruction can't operate on const. We must Load it
9148              into a register first.  */
9149           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9150           /* For the targets which have a very small and high-latency multiply
9151              unit, we prefer to synthesize the mult with up to 5 instructions,
9152              giving a good balance between size and performance.  */
9153           if (arm_arch6m && arm_m_profile_small_mul)
9154             return COSTS_N_INSNS (5);
9155           else
9156             return COSTS_N_INSNS (1) + const_size;
9157         }
9158       return COSTS_N_INSNS (1);
9159
9160     case SET:
9161       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9162          the mode.  */
9163       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9164       cost = COSTS_N_INSNS (words);
9165       if (satisfies_constraint_J (SET_SRC (x))
9166           || satisfies_constraint_K (SET_SRC (x))
9167              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9168           || (CONST_INT_P (SET_SRC (x))
9169               && UINTVAL (SET_SRC (x)) >= 256
9170               && TARGET_HAVE_MOVT
9171               && satisfies_constraint_j (SET_SRC (x)))
9172              /* thumb1_movdi_insn.  */
9173           || ((words > 1) && MEM_P (SET_SRC (x))))
9174         cost += COSTS_N_INSNS (1);
9175       return cost;
9176
9177     case CONST_INT:
9178       if (outer == SET)
9179         {
9180           if (UINTVAL (x) < 256)
9181             return COSTS_N_INSNS (1);
9182           /* movw is 4byte long.  */
9183           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9184             return COSTS_N_INSNS (2);
9185           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9186           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9187             return COSTS_N_INSNS (2);
9188           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9189           if (thumb_shiftable_const (INTVAL (x)))
9190             return COSTS_N_INSNS (2);
9191           return COSTS_N_INSNS (3);
9192         }
9193       else if ((outer == PLUS || outer == COMPARE)
9194                && INTVAL (x) < 256 && INTVAL (x) > -256)
9195         return 0;
9196       else if ((outer == IOR || outer == XOR || outer == AND)
9197                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9198         return COSTS_N_INSNS (1);
9199       else if (outer == AND)
9200         {
9201           int i;
9202           /* This duplicates the tests in the andsi3 expander.  */
9203           for (i = 9; i <= 31; i++)
9204             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9205                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9206               return COSTS_N_INSNS (2);
9207         }
9208       else if (outer == ASHIFT || outer == ASHIFTRT
9209                || outer == LSHIFTRT)
9210         return 0;
9211       return COSTS_N_INSNS (2);
9212
9213     case CONST:
9214     case CONST_DOUBLE:
9215     case LABEL_REF:
9216     case SYMBOL_REF:
9217       return COSTS_N_INSNS (3);
9218
9219     case UDIV:
9220     case UMOD:
9221     case DIV:
9222     case MOD:
9223       return 100;
9224
9225     case TRUNCATE:
9226       return 99;
9227
9228     case AND:
9229     case XOR:
9230     case IOR:
9231       return COSTS_N_INSNS (1);
9232
9233     case MEM:
9234       return (COSTS_N_INSNS (1)
9235               + COSTS_N_INSNS (1)
9236                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9237               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9238                  ? COSTS_N_INSNS (1) : 0));
9239
9240     case IF_THEN_ELSE:
9241       /* XXX a guess.  */
9242       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9243         return 14;
9244       return 2;
9245
9246     case ZERO_EXTEND:
9247       /* XXX still guessing.  */
9248       switch (GET_MODE (XEXP (x, 0)))
9249         {
9250           case E_QImode:
9251             return (1 + (mode == DImode ? 4 : 0)
9252                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9253
9254           case E_HImode:
9255             return (4 + (mode == DImode ? 4 : 0)
9256                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9257
9258           case E_SImode:
9259             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9260
9261           default:
9262             return 99;
9263         }
9264
9265     default:
9266       return 99;
9267     }
9268 }
9269
9270 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9271    operand, then return the operand that is being shifted.  If the shift
9272    is not by a constant, then set SHIFT_REG to point to the operand.
9273    Return NULL if OP is not a shifter operand.  */
9274 static rtx
9275 shifter_op_p (rtx op, rtx *shift_reg)
9276 {
9277   enum rtx_code code = GET_CODE (op);
9278
9279   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9280       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9281     return XEXP (op, 0);
9282   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9283     return XEXP (op, 0);
9284   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9285            || code == ASHIFTRT)
9286     {
9287       if (!CONST_INT_P (XEXP (op, 1)))
9288         *shift_reg = XEXP (op, 1);
9289       return XEXP (op, 0);
9290     }
9291
9292   return NULL;
9293 }
9294
9295 static bool
9296 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9297 {
9298   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9299   rtx_code code = GET_CODE (x);
9300   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9301
9302   switch (XINT (x, 1))
9303     {
9304     case UNSPEC_UNALIGNED_LOAD:
9305       /* We can only do unaligned loads into the integer unit, and we can't
9306          use LDM or LDRD.  */
9307       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9308       if (speed_p)
9309         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9310                   + extra_cost->ldst.load_unaligned);
9311
9312 #ifdef NOT_YET
9313       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9314                                  ADDR_SPACE_GENERIC, speed_p);
9315 #endif
9316       return true;
9317
9318     case UNSPEC_UNALIGNED_STORE:
9319       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9320       if (speed_p)
9321         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9322                   + extra_cost->ldst.store_unaligned);
9323
9324       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9325 #ifdef NOT_YET
9326       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9327                                  ADDR_SPACE_GENERIC, speed_p);
9328 #endif
9329       return true;
9330
9331     case UNSPEC_VRINTZ:
9332     case UNSPEC_VRINTP:
9333     case UNSPEC_VRINTM:
9334     case UNSPEC_VRINTR:
9335     case UNSPEC_VRINTX:
9336     case UNSPEC_VRINTA:
9337       if (speed_p)
9338         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9339
9340       return true;
9341     default:
9342       *cost = COSTS_N_INSNS (2);
9343       break;
9344     }
9345   return true;
9346 }
9347
9348 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9349    call (one insn for -Os) and then one for processing the result.  */
9350 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9351
9352 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9353         do                                                              \
9354           {                                                             \
9355             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9356             if (shift_op != NULL                                        \
9357                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9358               {                                                         \
9359                 if (shift_reg)                                          \
9360                   {                                                     \
9361                     if (speed_p)                                        \
9362                       *cost += extra_cost->alu.arith_shift_reg;         \
9363                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9364                                        ASHIFT, 1, speed_p);             \
9365                   }                                                     \
9366                 else if (speed_p)                                       \
9367                   *cost += extra_cost->alu.arith_shift;                 \
9368                                                                         \
9369                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9370                                     ASHIFT, 0, speed_p)                 \
9371                           + rtx_cost (XEXP (x, 1 - IDX),                \
9372                                       GET_MODE (shift_op),              \
9373                                       OP, 1, speed_p));                 \
9374                 return true;                                            \
9375               }                                                         \
9376           }                                                             \
9377         while (0)
9378
9379 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9380    considering the costs of the addressing mode and memory access
9381    separately.  */
9382 static bool
9383 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9384                int *cost, bool speed_p)
9385 {
9386   machine_mode mode = GET_MODE (x);
9387
9388   *cost = COSTS_N_INSNS (1);
9389
9390   if (flag_pic
9391       && GET_CODE (XEXP (x, 0)) == PLUS
9392       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9393     /* This will be split into two instructions.  Add the cost of the
9394        additional instruction here.  The cost of the memory access is computed
9395        below.  See arm.md:calculate_pic_address.  */
9396     *cost += COSTS_N_INSNS (1);
9397
9398   /* Calculate cost of the addressing mode.  */
9399   if (speed_p)
9400     {
9401       arm_addr_mode_op op_type;
9402       switch (GET_CODE (XEXP (x, 0)))
9403         {
9404         default:
9405         case REG:
9406           op_type = AMO_DEFAULT;
9407           break;
9408         case MINUS:
9409           /* MINUS does not appear in RTL, but the architecture supports it,
9410              so handle this case defensively.  */
9411           /* fall through */
9412         case PLUS:
9413           op_type = AMO_NO_WB;
9414           break;
9415         case PRE_INC:
9416         case PRE_DEC:
9417         case POST_INC:
9418         case POST_DEC:
9419         case PRE_MODIFY:
9420         case POST_MODIFY:
9421           op_type = AMO_WB;
9422           break;
9423         }
9424
9425       if (VECTOR_MODE_P (mode))
9426           *cost += current_tune->addr_mode_costs->vector[op_type];
9427       else if (FLOAT_MODE_P (mode))
9428           *cost += current_tune->addr_mode_costs->fp[op_type];
9429       else
9430           *cost += current_tune->addr_mode_costs->integer[op_type];
9431     }
9432
9433   /* Calculate cost of memory access.  */
9434   if (speed_p)
9435     {
9436       if (FLOAT_MODE_P (mode))
9437         {
9438           if (GET_MODE_SIZE (mode) == 8)
9439             *cost += extra_cost->ldst.loadd;
9440           else
9441             *cost += extra_cost->ldst.loadf;
9442         }
9443       else if (VECTOR_MODE_P (mode))
9444         *cost += extra_cost->ldst.loadv;
9445       else
9446         {
9447           /* Integer modes */
9448           if (GET_MODE_SIZE (mode) == 8)
9449             *cost += extra_cost->ldst.ldrd;
9450           else
9451             *cost += extra_cost->ldst.load;
9452         }
9453     }
9454
9455   return true;
9456 }
9457
9458 /* RTX costs.  Make an estimate of the cost of executing the operation
9459    X, which is contained within an operation with code OUTER_CODE.
9460    SPEED_P indicates whether the cost desired is the performance cost,
9461    or the size cost.  The estimate is stored in COST and the return
9462    value is TRUE if the cost calculation is final, or FALSE if the
9463    caller should recurse through the operands of X to add additional
9464    costs.
9465
9466    We currently make no attempt to model the size savings of Thumb-2
9467    16-bit instructions.  At the normal points in compilation where
9468    this code is called we have no measure of whether the condition
9469    flags are live or not, and thus no realistic way to determine what
9470    the size will eventually be.  */
9471 static bool
9472 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9473                    const struct cpu_cost_table *extra_cost,
9474                    int *cost, bool speed_p)
9475 {
9476   machine_mode mode = GET_MODE (x);
9477
9478   *cost = COSTS_N_INSNS (1);
9479
9480   if (TARGET_THUMB1)
9481     {
9482       if (speed_p)
9483         *cost = thumb1_rtx_costs (x, code, outer_code);
9484       else
9485         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9486       return true;
9487     }
9488
9489   switch (code)
9490     {
9491     case SET:
9492       *cost = 0;
9493       /* SET RTXs don't have a mode so we get it from the destination.  */
9494       mode = GET_MODE (SET_DEST (x));
9495
9496       if (REG_P (SET_SRC (x))
9497           && REG_P (SET_DEST (x)))
9498         {
9499           /* Assume that most copies can be done with a single insn,
9500              unless we don't have HW FP, in which case everything
9501              larger than word mode will require two insns.  */
9502           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9503                                    && GET_MODE_SIZE (mode) > 4)
9504                                   || mode == DImode)
9505                                  ? 2 : 1);
9506           /* Conditional register moves can be encoded
9507              in 16 bits in Thumb mode.  */
9508           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9509             *cost >>= 1;
9510
9511           return true;
9512         }
9513
9514       if (CONST_INT_P (SET_SRC (x)))
9515         {
9516           /* Handle CONST_INT here, since the value doesn't have a mode
9517              and we would otherwise be unable to work out the true cost.  */
9518           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9519                             0, speed_p);
9520           outer_code = SET;
9521           /* Slightly lower the cost of setting a core reg to a constant.
9522              This helps break up chains and allows for better scheduling.  */
9523           if (REG_P (SET_DEST (x))
9524               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9525             *cost -= 1;
9526           x = SET_SRC (x);
9527           /* Immediate moves with an immediate in the range [0, 255] can be
9528              encoded in 16 bits in Thumb mode.  */
9529           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9530               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9531             *cost >>= 1;
9532           goto const_int_cost;
9533         }
9534
9535       return false;
9536
9537     case MEM:
9538       return arm_mem_costs (x, extra_cost, cost, speed_p);
9539
9540     case PARALLEL:
9541     {
9542    /* Calculations of LDM costs are complex.  We assume an initial cost
9543    (ldm_1st) which will load the number of registers mentioned in
9544    ldm_regs_per_insn_1st registers; then each additional
9545    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9546    formula for N regs is thus:
9547
9548    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9549                              + ldm_regs_per_insn_subsequent - 1)
9550                             / ldm_regs_per_insn_subsequent).
9551
9552    Additional costs may also be added for addressing.  A similar
9553    formula is used for STM.  */
9554
9555       bool is_ldm = load_multiple_operation (x, SImode);
9556       bool is_stm = store_multiple_operation (x, SImode);
9557
9558       if (is_ldm || is_stm)
9559         {
9560           if (speed_p)
9561             {
9562               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9563               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9564                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9565                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9566               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9567                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9568                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9569
9570               *cost += regs_per_insn_1st
9571                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9572                                             + regs_per_insn_sub - 1)
9573                                           / regs_per_insn_sub);
9574               return true;
9575             }
9576
9577         }
9578       return false;
9579     }
9580     case DIV:
9581     case UDIV:
9582       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9583           && (mode == SFmode || !TARGET_VFP_SINGLE))
9584         *cost += COSTS_N_INSNS (speed_p
9585                                ? extra_cost->fp[mode != SFmode].div : 0);
9586       else if (mode == SImode && TARGET_IDIV)
9587         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9588       else
9589         *cost = LIBCALL_COST (2);
9590
9591       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9592          possible udiv is prefered.  */
9593       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9594       return false;     /* All arguments must be in registers.  */
9595
9596     case MOD:
9597       /* MOD by a power of 2 can be expanded as:
9598          rsbs    r1, r0, #0
9599          and     r0, r0, #(n - 1)
9600          and     r1, r1, #(n - 1)
9601          rsbpl   r0, r1, #0.  */
9602       if (CONST_INT_P (XEXP (x, 1))
9603           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9604           && mode == SImode)
9605         {
9606           *cost += COSTS_N_INSNS (3);
9607
9608           if (speed_p)
9609             *cost += 2 * extra_cost->alu.logical
9610                      + extra_cost->alu.arith;
9611           return true;
9612         }
9613
9614     /* Fall-through.  */
9615     case UMOD:
9616       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9617          possible udiv is prefered.  */
9618       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9619       return false;     /* All arguments must be in registers.  */
9620
9621     case ROTATE:
9622       if (mode == SImode && REG_P (XEXP (x, 1)))
9623         {
9624           *cost += (COSTS_N_INSNS (1)
9625                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9626           if (speed_p)
9627             *cost += extra_cost->alu.shift_reg;
9628           return true;
9629         }
9630       /* Fall through */
9631     case ROTATERT:
9632     case ASHIFT:
9633     case LSHIFTRT:
9634     case ASHIFTRT:
9635       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9636         {
9637           *cost += (COSTS_N_INSNS (2)
9638                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9639           if (speed_p)
9640             *cost += 2 * extra_cost->alu.shift;
9641           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9642           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9643             *cost += 1;
9644           return true;
9645         }
9646       else if (mode == SImode)
9647         {
9648           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9649           /* Slightly disparage register shifts at -Os, but not by much.  */
9650           if (!CONST_INT_P (XEXP (x, 1)))
9651             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9652                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9653           return true;
9654         }
9655       else if (GET_MODE_CLASS (mode) == MODE_INT
9656                && GET_MODE_SIZE (mode) < 4)
9657         {
9658           if (code == ASHIFT)
9659             {
9660               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9661               /* Slightly disparage register shifts at -Os, but not by
9662                  much.  */
9663               if (!CONST_INT_P (XEXP (x, 1)))
9664                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9665                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9666             }
9667           else if (code == LSHIFTRT || code == ASHIFTRT)
9668             {
9669               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9670                 {
9671                   /* Can use SBFX/UBFX.  */
9672                   if (speed_p)
9673                     *cost += extra_cost->alu.bfx;
9674                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9675                 }
9676               else
9677                 {
9678                   *cost += COSTS_N_INSNS (1);
9679                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9680                   if (speed_p)
9681                     {
9682                       if (CONST_INT_P (XEXP (x, 1)))
9683                         *cost += 2 * extra_cost->alu.shift;
9684                       else
9685                         *cost += (extra_cost->alu.shift
9686                                   + extra_cost->alu.shift_reg);
9687                     }
9688                   else
9689                     /* Slightly disparage register shifts.  */
9690                     *cost += !CONST_INT_P (XEXP (x, 1));
9691                 }
9692             }
9693           else /* Rotates.  */
9694             {
9695               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9696               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9697               if (speed_p)
9698                 {
9699                   if (CONST_INT_P (XEXP (x, 1)))
9700                     *cost += (2 * extra_cost->alu.shift
9701                               + extra_cost->alu.log_shift);
9702                   else
9703                     *cost += (extra_cost->alu.shift
9704                               + extra_cost->alu.shift_reg
9705                               + extra_cost->alu.log_shift_reg);
9706                 }
9707             }
9708           return true;
9709         }
9710
9711       *cost = LIBCALL_COST (2);
9712       return false;
9713
9714     case BSWAP:
9715       if (arm_arch6)
9716         {
9717           if (mode == SImode)
9718             {
9719               if (speed_p)
9720                 *cost += extra_cost->alu.rev;
9721
9722               return false;
9723             }
9724         }
9725       else
9726         {
9727         /* No rev instruction available.  Look at arm_legacy_rev
9728            and thumb_legacy_rev for the form of RTL used then.  */
9729           if (TARGET_THUMB)
9730             {
9731               *cost += COSTS_N_INSNS (9);
9732
9733               if (speed_p)
9734                 {
9735                   *cost += 6 * extra_cost->alu.shift;
9736                   *cost += 3 * extra_cost->alu.logical;
9737                 }
9738             }
9739           else
9740             {
9741               *cost += COSTS_N_INSNS (4);
9742
9743               if (speed_p)
9744                 {
9745                   *cost += 2 * extra_cost->alu.shift;
9746                   *cost += extra_cost->alu.arith_shift;
9747                   *cost += 2 * extra_cost->alu.logical;
9748                 }
9749             }
9750           return true;
9751         }
9752       return false;
9753
9754     case MINUS:
9755       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9756           && (mode == SFmode || !TARGET_VFP_SINGLE))
9757         {
9758           if (GET_CODE (XEXP (x, 0)) == MULT
9759               || GET_CODE (XEXP (x, 1)) == MULT)
9760             {
9761               rtx mul_op0, mul_op1, sub_op;
9762
9763               if (speed_p)
9764                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9765
9766               if (GET_CODE (XEXP (x, 0)) == MULT)
9767                 {
9768                   mul_op0 = XEXP (XEXP (x, 0), 0);
9769                   mul_op1 = XEXP (XEXP (x, 0), 1);
9770                   sub_op = XEXP (x, 1);
9771                 }
9772               else
9773                 {
9774                   mul_op0 = XEXP (XEXP (x, 1), 0);
9775                   mul_op1 = XEXP (XEXP (x, 1), 1);
9776                   sub_op = XEXP (x, 0);
9777                 }
9778
9779               /* The first operand of the multiply may be optionally
9780                  negated.  */
9781               if (GET_CODE (mul_op0) == NEG)
9782                 mul_op0 = XEXP (mul_op0, 0);
9783
9784               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9785                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9786                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9787
9788               return true;
9789             }
9790
9791           if (speed_p)
9792             *cost += extra_cost->fp[mode != SFmode].addsub;
9793           return false;
9794         }
9795
9796       if (mode == SImode)
9797         {
9798           rtx shift_by_reg = NULL;
9799           rtx shift_op;
9800           rtx non_shift_op;
9801
9802           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9803           if (shift_op == NULL)
9804             {
9805               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9806               non_shift_op = XEXP (x, 0);
9807             }
9808           else
9809             non_shift_op = XEXP (x, 1);
9810
9811           if (shift_op != NULL)
9812             {
9813               if (shift_by_reg != NULL)
9814                 {
9815                   if (speed_p)
9816                     *cost += extra_cost->alu.arith_shift_reg;
9817                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9818                 }
9819               else if (speed_p)
9820                 *cost += extra_cost->alu.arith_shift;
9821
9822               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9823               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9824               return true;
9825             }
9826
9827           if (arm_arch_thumb2
9828               && GET_CODE (XEXP (x, 1)) == MULT)
9829             {
9830               /* MLS.  */
9831               if (speed_p)
9832                 *cost += extra_cost->mult[0].add;
9833               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9834               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9835               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9836               return true;
9837             }
9838
9839           if (CONST_INT_P (XEXP (x, 0)))
9840             {
9841               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9842                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9843                                             NULL_RTX, 1, 0);
9844               *cost = COSTS_N_INSNS (insns);
9845               if (speed_p)
9846                 *cost += insns * extra_cost->alu.arith;
9847               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9848               return true;
9849             }
9850           else if (speed_p)
9851             *cost += extra_cost->alu.arith;
9852
9853           return false;
9854         }
9855
9856       if (GET_MODE_CLASS (mode) == MODE_INT
9857           && GET_MODE_SIZE (mode) < 4)
9858         {
9859           rtx shift_op, shift_reg;
9860           shift_reg = NULL;
9861
9862           /* We check both sides of the MINUS for shifter operands since,
9863              unlike PLUS, it's not commutative.  */
9864
9865           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9866           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9867
9868           /* Slightly disparage, as we might need to widen the result.  */
9869           *cost += 1;
9870           if (speed_p)
9871             *cost += extra_cost->alu.arith;
9872
9873           if (CONST_INT_P (XEXP (x, 0)))
9874             {
9875               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9876               return true;
9877             }
9878
9879           return false;
9880         }
9881
9882       if (mode == DImode)
9883         {
9884           *cost += COSTS_N_INSNS (1);
9885
9886           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9887             {
9888               rtx op1 = XEXP (x, 1);
9889
9890               if (speed_p)
9891                 *cost += 2 * extra_cost->alu.arith;
9892
9893               if (GET_CODE (op1) == ZERO_EXTEND)
9894                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9895                                    0, speed_p);
9896               else
9897                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9898               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9899                                  0, speed_p);
9900               return true;
9901             }
9902           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9903             {
9904               if (speed_p)
9905                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9906               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9907                                   0, speed_p)
9908                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9909               return true;
9910             }
9911           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9912                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9913             {
9914               if (speed_p)
9915                 *cost += (extra_cost->alu.arith
9916                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9917                              ? extra_cost->alu.arith
9918                              : extra_cost->alu.arith_shift));
9919               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9920                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9921                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9922               return true;
9923             }
9924
9925           if (speed_p)
9926             *cost += 2 * extra_cost->alu.arith;
9927           return false;
9928         }
9929
9930       /* Vector mode?  */
9931
9932       *cost = LIBCALL_COST (2);
9933       return false;
9934
9935     case PLUS:
9936       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9937           && (mode == SFmode || !TARGET_VFP_SINGLE))
9938         {
9939           if (GET_CODE (XEXP (x, 0)) == MULT)
9940             {
9941               rtx mul_op0, mul_op1, add_op;
9942
9943               if (speed_p)
9944                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9945
9946               mul_op0 = XEXP (XEXP (x, 0), 0);
9947               mul_op1 = XEXP (XEXP (x, 0), 1);
9948               add_op = XEXP (x, 1);
9949
9950               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9951                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9952                         + rtx_cost (add_op, mode, code, 0, speed_p));
9953
9954               return true;
9955             }
9956
9957           if (speed_p)
9958             *cost += extra_cost->fp[mode != SFmode].addsub;
9959           return false;
9960         }
9961       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9962         {
9963           *cost = LIBCALL_COST (2);
9964           return false;
9965         }
9966
9967         /* Narrow modes can be synthesized in SImode, but the range
9968            of useful sub-operations is limited.  Check for shift operations
9969            on one of the operands.  Only left shifts can be used in the
9970            narrow modes.  */
9971       if (GET_MODE_CLASS (mode) == MODE_INT
9972           && GET_MODE_SIZE (mode) < 4)
9973         {
9974           rtx shift_op, shift_reg;
9975           shift_reg = NULL;
9976
9977           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9978
9979           if (CONST_INT_P (XEXP (x, 1)))
9980             {
9981               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9982                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9983                                             NULL_RTX, 1, 0);
9984               *cost = COSTS_N_INSNS (insns);
9985               if (speed_p)
9986                 *cost += insns * extra_cost->alu.arith;
9987               /* Slightly penalize a narrow operation as the result may
9988                  need widening.  */
9989               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9990               return true;
9991             }
9992
9993           /* Slightly penalize a narrow operation as the result may
9994              need widening.  */
9995           *cost += 1;
9996           if (speed_p)
9997             *cost += extra_cost->alu.arith;
9998
9999           return false;
10000         }
10001
10002       if (mode == SImode)
10003         {
10004           rtx shift_op, shift_reg;
10005
10006           if (TARGET_INT_SIMD
10007               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10008                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10009             {
10010               /* UXTA[BH] or SXTA[BH].  */
10011               if (speed_p)
10012                 *cost += extra_cost->alu.extend_arith;
10013               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10014                                   0, speed_p)
10015                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10016               return true;
10017             }
10018
10019           shift_reg = NULL;
10020           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10021           if (shift_op != NULL)
10022             {
10023               if (shift_reg)
10024                 {
10025                   if (speed_p)
10026                     *cost += extra_cost->alu.arith_shift_reg;
10027                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10028                 }
10029               else if (speed_p)
10030                 *cost += extra_cost->alu.arith_shift;
10031
10032               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10033                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10034               return true;
10035             }
10036           if (GET_CODE (XEXP (x, 0)) == MULT)
10037             {
10038               rtx mul_op = XEXP (x, 0);
10039
10040               if (TARGET_DSP_MULTIPLY
10041                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10042                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10043                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10044                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10045                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10046                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10047                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10048                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10049                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10050                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10051                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10052                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10053                                       == 16))))))
10054                 {
10055                   /* SMLA[BT][BT].  */
10056                   if (speed_p)
10057                     *cost += extra_cost->mult[0].extend_add;
10058                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10059                                       SIGN_EXTEND, 0, speed_p)
10060                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10061                                         SIGN_EXTEND, 0, speed_p)
10062                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10063                   return true;
10064                 }
10065
10066               if (speed_p)
10067                 *cost += extra_cost->mult[0].add;
10068               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10069                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10070                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10071               return true;
10072             }
10073           if (CONST_INT_P (XEXP (x, 1)))
10074             {
10075               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10076                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10077                                             NULL_RTX, 1, 0);
10078               *cost = COSTS_N_INSNS (insns);
10079               if (speed_p)
10080                 *cost += insns * extra_cost->alu.arith;
10081               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10082               return true;
10083             }
10084           else if (speed_p)
10085             *cost += extra_cost->alu.arith;
10086
10087           return false;
10088         }
10089
10090       if (mode == DImode)
10091         {
10092           if (GET_CODE (XEXP (x, 0)) == MULT
10093               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10094                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10095                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10096                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10097             {
10098               if (speed_p)
10099                 *cost += extra_cost->mult[1].extend_add;
10100               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10101                                   ZERO_EXTEND, 0, speed_p)
10102                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10103                                     ZERO_EXTEND, 0, speed_p)
10104                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10105               return true;
10106             }
10107
10108           *cost += COSTS_N_INSNS (1);
10109
10110           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10111               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10112             {
10113               if (speed_p)
10114                 *cost += (extra_cost->alu.arith
10115                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10116                              ? extra_cost->alu.arith
10117                              : extra_cost->alu.arith_shift));
10118
10119               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10120                                   0, speed_p)
10121                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10122               return true;
10123             }
10124
10125           if (speed_p)
10126             *cost += 2 * extra_cost->alu.arith;
10127           return false;
10128         }
10129
10130       /* Vector mode?  */
10131       *cost = LIBCALL_COST (2);
10132       return false;
10133     case IOR:
10134       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10135         {
10136           if (speed_p)
10137             *cost += extra_cost->alu.rev;
10138
10139           return true;
10140         }
10141     /* Fall through.  */
10142     case AND: case XOR:
10143       if (mode == SImode)
10144         {
10145           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10146           rtx op0 = XEXP (x, 0);
10147           rtx shift_op, shift_reg;
10148
10149           if (subcode == NOT
10150               && (code == AND
10151                   || (code == IOR && TARGET_THUMB2)))
10152             op0 = XEXP (op0, 0);
10153
10154           shift_reg = NULL;
10155           shift_op = shifter_op_p (op0, &shift_reg);
10156           if (shift_op != NULL)
10157             {
10158               if (shift_reg)
10159                 {
10160                   if (speed_p)
10161                     *cost += extra_cost->alu.log_shift_reg;
10162                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10163                 }
10164               else if (speed_p)
10165                 *cost += extra_cost->alu.log_shift;
10166
10167               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10168                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10169               return true;
10170             }
10171
10172           if (CONST_INT_P (XEXP (x, 1)))
10173             {
10174               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10175                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10176                                             NULL_RTX, 1, 0);
10177
10178               *cost = COSTS_N_INSNS (insns);
10179               if (speed_p)
10180                 *cost += insns * extra_cost->alu.logical;
10181               *cost += rtx_cost (op0, mode, code, 0, speed_p);
10182               return true;
10183             }
10184
10185           if (speed_p)
10186             *cost += extra_cost->alu.logical;
10187           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10188                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10189           return true;
10190         }
10191
10192       if (mode == DImode)
10193         {
10194           rtx op0 = XEXP (x, 0);
10195           enum rtx_code subcode = GET_CODE (op0);
10196
10197           *cost += COSTS_N_INSNS (1);
10198
10199           if (subcode == NOT
10200               && (code == AND
10201                   || (code == IOR && TARGET_THUMB2)))
10202             op0 = XEXP (op0, 0);
10203
10204           if (GET_CODE (op0) == ZERO_EXTEND)
10205             {
10206               if (speed_p)
10207                 *cost += 2 * extra_cost->alu.logical;
10208
10209               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10210                                   0, speed_p)
10211                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10212               return true;
10213             }
10214           else if (GET_CODE (op0) == SIGN_EXTEND)
10215             {
10216               if (speed_p)
10217                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10218
10219               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10220                                   0, speed_p)
10221                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10222               return true;
10223             }
10224
10225           if (speed_p)
10226             *cost += 2 * extra_cost->alu.logical;
10227
10228           return true;
10229         }
10230       /* Vector mode?  */
10231
10232       *cost = LIBCALL_COST (2);
10233       return false;
10234
10235     case MULT:
10236       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10237           && (mode == SFmode || !TARGET_VFP_SINGLE))
10238         {
10239           rtx op0 = XEXP (x, 0);
10240
10241           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10242             op0 = XEXP (op0, 0);
10243
10244           if (speed_p)
10245             *cost += extra_cost->fp[mode != SFmode].mult;
10246
10247           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10248                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10249           return true;
10250         }
10251       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10252         {
10253           *cost = LIBCALL_COST (2);
10254           return false;
10255         }
10256
10257       if (mode == SImode)
10258         {
10259           if (TARGET_DSP_MULTIPLY
10260               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10261                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10262                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10263                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10264                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10265                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10266                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10267                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10268                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10269                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10270                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10271                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10272                                   == 16))))))
10273             {
10274               /* SMUL[TB][TB].  */
10275               if (speed_p)
10276                 *cost += extra_cost->mult[0].extend;
10277               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10278                                  SIGN_EXTEND, 0, speed_p);
10279               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10280                                  SIGN_EXTEND, 1, speed_p);
10281               return true;
10282             }
10283           if (speed_p)
10284             *cost += extra_cost->mult[0].simple;
10285           return false;
10286         }
10287
10288       if (mode == DImode)
10289         {
10290           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10291                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10292                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10293                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10294             {
10295               if (speed_p)
10296                 *cost += extra_cost->mult[1].extend;
10297               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10298                                   ZERO_EXTEND, 0, speed_p)
10299                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10300                                     ZERO_EXTEND, 0, speed_p));
10301               return true;
10302             }
10303
10304           *cost = LIBCALL_COST (2);
10305           return false;
10306         }
10307
10308       /* Vector mode?  */
10309       *cost = LIBCALL_COST (2);
10310       return false;
10311
10312     case NEG:
10313       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10314           && (mode == SFmode || !TARGET_VFP_SINGLE))
10315         {
10316           if (GET_CODE (XEXP (x, 0)) == MULT)
10317             {
10318               /* VNMUL.  */
10319               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10320               return true;
10321             }
10322
10323           if (speed_p)
10324             *cost += extra_cost->fp[mode != SFmode].neg;
10325
10326           return false;
10327         }
10328       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10329         {
10330           *cost = LIBCALL_COST (1);
10331           return false;
10332         }
10333
10334       if (mode == SImode)
10335         {
10336           if (GET_CODE (XEXP (x, 0)) == ABS)
10337             {
10338               *cost += COSTS_N_INSNS (1);
10339               /* Assume the non-flag-changing variant.  */
10340               if (speed_p)
10341                 *cost += (extra_cost->alu.log_shift
10342                           + extra_cost->alu.arith_shift);
10343               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10344               return true;
10345             }
10346
10347           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10348               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10349             {
10350               *cost += COSTS_N_INSNS (1);
10351               /* No extra cost for MOV imm and MVN imm.  */
10352               /* If the comparison op is using the flags, there's no further
10353                  cost, otherwise we need to add the cost of the comparison.  */
10354               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10355                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10356                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10357                 {
10358                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10359                   *cost += (COSTS_N_INSNS (1)
10360                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10361                                         0, speed_p)
10362                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10363                                         1, speed_p));
10364                   if (speed_p)
10365                     *cost += extra_cost->alu.arith;
10366                 }
10367               return true;
10368             }
10369
10370           if (speed_p)
10371             *cost += extra_cost->alu.arith;
10372           return false;
10373         }
10374
10375       if (GET_MODE_CLASS (mode) == MODE_INT
10376           && GET_MODE_SIZE (mode) < 4)
10377         {
10378           /* Slightly disparage, as we might need an extend operation.  */
10379           *cost += 1;
10380           if (speed_p)
10381             *cost += extra_cost->alu.arith;
10382           return false;
10383         }
10384
10385       if (mode == DImode)
10386         {
10387           *cost += COSTS_N_INSNS (1);
10388           if (speed_p)
10389             *cost += 2 * extra_cost->alu.arith;
10390           return false;
10391         }
10392
10393       /* Vector mode?  */
10394       *cost = LIBCALL_COST (1);
10395       return false;
10396
10397     case NOT:
10398       if (mode == SImode)
10399         {
10400           rtx shift_op;
10401           rtx shift_reg = NULL;
10402
10403           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10404
10405           if (shift_op)
10406             {
10407               if (shift_reg != NULL)
10408                 {
10409                   if (speed_p)
10410                     *cost += extra_cost->alu.log_shift_reg;
10411                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10412                 }
10413               else if (speed_p)
10414                 *cost += extra_cost->alu.log_shift;
10415               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10416               return true;
10417             }
10418
10419           if (speed_p)
10420             *cost += extra_cost->alu.logical;
10421           return false;
10422         }
10423       if (mode == DImode)
10424         {
10425           *cost += COSTS_N_INSNS (1);
10426           return false;
10427         }
10428
10429       /* Vector mode?  */
10430
10431       *cost += LIBCALL_COST (1);
10432       return false;
10433
10434     case IF_THEN_ELSE:
10435       {
10436         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10437           {
10438             *cost += COSTS_N_INSNS (3);
10439             return true;
10440           }
10441         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10442         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10443
10444         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10445         /* Assume that if one arm of the if_then_else is a register,
10446            that it will be tied with the result and eliminate the
10447            conditional insn.  */
10448         if (REG_P (XEXP (x, 1)))
10449           *cost += op2cost;
10450         else if (REG_P (XEXP (x, 2)))
10451           *cost += op1cost;
10452         else
10453           {
10454             if (speed_p)
10455               {
10456                 if (extra_cost->alu.non_exec_costs_exec)
10457                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10458                 else
10459                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10460               }
10461             else
10462               *cost += op1cost + op2cost;
10463           }
10464       }
10465       return true;
10466
10467     case COMPARE:
10468       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10469         *cost = 0;
10470       else
10471         {
10472           machine_mode op0mode;
10473           /* We'll mostly assume that the cost of a compare is the cost of the
10474              LHS.  However, there are some notable exceptions.  */
10475
10476           /* Floating point compares are never done as side-effects.  */
10477           op0mode = GET_MODE (XEXP (x, 0));
10478           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10479               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10480             {
10481               if (speed_p)
10482                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10483
10484               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10485                 {
10486                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10487                   return true;
10488                 }
10489
10490               return false;
10491             }
10492           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10493             {
10494               *cost = LIBCALL_COST (2);
10495               return false;
10496             }
10497
10498           /* DImode compares normally take two insns.  */
10499           if (op0mode == DImode)
10500             {
10501               *cost += COSTS_N_INSNS (1);
10502               if (speed_p)
10503                 *cost += 2 * extra_cost->alu.arith;
10504               return false;
10505             }
10506
10507           if (op0mode == SImode)
10508             {
10509               rtx shift_op;
10510               rtx shift_reg;
10511
10512               if (XEXP (x, 1) == const0_rtx
10513                   && !(REG_P (XEXP (x, 0))
10514                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10515                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10516                 {
10517                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10518
10519                   /* Multiply operations that set the flags are often
10520                      significantly more expensive.  */
10521                   if (speed_p
10522                       && GET_CODE (XEXP (x, 0)) == MULT
10523                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10524                     *cost += extra_cost->mult[0].flag_setting;
10525
10526                   if (speed_p
10527                       && GET_CODE (XEXP (x, 0)) == PLUS
10528                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10529                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10530                                                             0), 1), mode))
10531                     *cost += extra_cost->mult[0].flag_setting;
10532                   return true;
10533                 }
10534
10535               shift_reg = NULL;
10536               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10537               if (shift_op != NULL)
10538                 {
10539                   if (shift_reg != NULL)
10540                     {
10541                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10542                                          1, speed_p);
10543                       if (speed_p)
10544                         *cost += extra_cost->alu.arith_shift_reg;
10545                     }
10546                   else if (speed_p)
10547                     *cost += extra_cost->alu.arith_shift;
10548                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10549                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10550                   return true;
10551                 }
10552
10553               if (speed_p)
10554                 *cost += extra_cost->alu.arith;
10555               if (CONST_INT_P (XEXP (x, 1))
10556                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10557                 {
10558                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10559                   return true;
10560                 }
10561               return false;
10562             }
10563
10564           /* Vector mode?  */
10565
10566           *cost = LIBCALL_COST (2);
10567           return false;
10568         }
10569       return true;
10570
10571     case EQ:
10572     case NE:
10573     case LT:
10574     case LE:
10575     case GT:
10576     case GE:
10577     case LTU:
10578     case LEU:
10579     case GEU:
10580     case GTU:
10581     case ORDERED:
10582     case UNORDERED:
10583     case UNEQ:
10584     case UNLE:
10585     case UNLT:
10586     case UNGE:
10587     case UNGT:
10588     case LTGT:
10589       if (outer_code == SET)
10590         {
10591           /* Is it a store-flag operation?  */
10592           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10593               && XEXP (x, 1) == const0_rtx)
10594             {
10595               /* Thumb also needs an IT insn.  */
10596               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10597               return true;
10598             }
10599           if (XEXP (x, 1) == const0_rtx)
10600             {
10601               switch (code)
10602                 {
10603                 case LT:
10604                   /* LSR Rd, Rn, #31.  */
10605                   if (speed_p)
10606                     *cost += extra_cost->alu.shift;
10607                   break;
10608
10609                 case EQ:
10610                   /* RSBS T1, Rn, #0
10611                      ADC  Rd, Rn, T1.  */
10612
10613                 case NE:
10614                   /* SUBS T1, Rn, #1
10615                      SBC  Rd, Rn, T1.  */
10616                   *cost += COSTS_N_INSNS (1);
10617                   break;
10618
10619                 case LE:
10620                   /* RSBS T1, Rn, Rn, LSR #31
10621                      ADC  Rd, Rn, T1. */
10622                   *cost += COSTS_N_INSNS (1);
10623                   if (speed_p)
10624                     *cost += extra_cost->alu.arith_shift;
10625                   break;
10626
10627                 case GT:
10628                   /* RSB  Rd, Rn, Rn, ASR #1
10629                      LSR  Rd, Rd, #31.  */
10630                   *cost += COSTS_N_INSNS (1);
10631                   if (speed_p)
10632                     *cost += (extra_cost->alu.arith_shift
10633                               + extra_cost->alu.shift);
10634                   break;
10635
10636                 case GE:
10637                   /* ASR  Rd, Rn, #31
10638                      ADD  Rd, Rn, #1.  */
10639                   *cost += COSTS_N_INSNS (1);
10640                   if (speed_p)
10641                     *cost += extra_cost->alu.shift;
10642                   break;
10643
10644                 default:
10645                   /* Remaining cases are either meaningless or would take
10646                      three insns anyway.  */
10647                   *cost = COSTS_N_INSNS (3);
10648                   break;
10649                 }
10650               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10651               return true;
10652             }
10653           else
10654             {
10655               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10656               if (CONST_INT_P (XEXP (x, 1))
10657                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10658                 {
10659                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10660                   return true;
10661                 }
10662
10663               return false;
10664             }
10665         }
10666       /* Not directly inside a set.  If it involves the condition code
10667          register it must be the condition for a branch, cond_exec or
10668          I_T_E operation.  Since the comparison is performed elsewhere
10669          this is just the control part which has no additional
10670          cost.  */
10671       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10672                && XEXP (x, 1) == const0_rtx)
10673         {
10674           *cost = 0;
10675           return true;
10676         }
10677       return false;
10678
10679     case ABS:
10680       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10681           && (mode == SFmode || !TARGET_VFP_SINGLE))
10682         {
10683           if (speed_p)
10684             *cost += extra_cost->fp[mode != SFmode].neg;
10685
10686           return false;
10687         }
10688       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10689         {
10690           *cost = LIBCALL_COST (1);
10691           return false;
10692         }
10693
10694       if (mode == SImode)
10695         {
10696           if (speed_p)
10697             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10698           return false;
10699         }
10700       /* Vector mode?  */
10701       *cost = LIBCALL_COST (1);
10702       return false;
10703
10704     case SIGN_EXTEND:
10705       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10706           && MEM_P (XEXP (x, 0)))
10707         {
10708           if (mode == DImode)
10709             *cost += COSTS_N_INSNS (1);
10710
10711           if (!speed_p)
10712             return true;
10713
10714           if (GET_MODE (XEXP (x, 0)) == SImode)
10715             *cost += extra_cost->ldst.load;
10716           else
10717             *cost += extra_cost->ldst.load_sign_extend;
10718
10719           if (mode == DImode)
10720             *cost += extra_cost->alu.shift;
10721
10722           return true;
10723         }
10724
10725       /* Widening from less than 32-bits requires an extend operation.  */
10726       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10727         {
10728           /* We have SXTB/SXTH.  */
10729           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10730           if (speed_p)
10731             *cost += extra_cost->alu.extend;
10732         }
10733       else if (GET_MODE (XEXP (x, 0)) != SImode)
10734         {
10735           /* Needs two shifts.  */
10736           *cost += COSTS_N_INSNS (1);
10737           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10738           if (speed_p)
10739             *cost += 2 * extra_cost->alu.shift;
10740         }
10741
10742       /* Widening beyond 32-bits requires one more insn.  */
10743       if (mode == DImode)
10744         {
10745           *cost += COSTS_N_INSNS (1);
10746           if (speed_p)
10747             *cost += extra_cost->alu.shift;
10748         }
10749
10750       return true;
10751
10752     case ZERO_EXTEND:
10753       if ((arm_arch4
10754            || GET_MODE (XEXP (x, 0)) == SImode
10755            || GET_MODE (XEXP (x, 0)) == QImode)
10756           && MEM_P (XEXP (x, 0)))
10757         {
10758           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10759
10760           if (mode == DImode)
10761             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10762
10763           return true;
10764         }
10765
10766       /* Widening from less than 32-bits requires an extend operation.  */
10767       if (GET_MODE (XEXP (x, 0)) == QImode)
10768         {
10769           /* UXTB can be a shorter instruction in Thumb2, but it might
10770              be slower than the AND Rd, Rn, #255 alternative.  When
10771              optimizing for speed it should never be slower to use
10772              AND, and we don't really model 16-bit vs 32-bit insns
10773              here.  */
10774           if (speed_p)
10775             *cost += extra_cost->alu.logical;
10776         }
10777       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10778         {
10779           /* We have UXTB/UXTH.  */
10780           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10781           if (speed_p)
10782             *cost += extra_cost->alu.extend;
10783         }
10784       else if (GET_MODE (XEXP (x, 0)) != SImode)
10785         {
10786           /* Needs two shifts.  It's marginally preferable to use
10787              shifts rather than two BIC instructions as the second
10788              shift may merge with a subsequent insn as a shifter
10789              op.  */
10790           *cost = COSTS_N_INSNS (2);
10791           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10792           if (speed_p)
10793             *cost += 2 * extra_cost->alu.shift;
10794         }
10795
10796       /* Widening beyond 32-bits requires one more insn.  */
10797       if (mode == DImode)
10798         {
10799           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10800         }
10801
10802       return true;
10803
10804     case CONST_INT:
10805       *cost = 0;
10806       /* CONST_INT has no mode, so we cannot tell for sure how many
10807          insns are really going to be needed.  The best we can do is
10808          look at the value passed.  If it fits in SImode, then assume
10809          that's the mode it will be used for.  Otherwise assume it
10810          will be used in DImode.  */
10811       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10812         mode = SImode;
10813       else
10814         mode = DImode;
10815
10816       /* Avoid blowing up in arm_gen_constant ().  */
10817       if (!(outer_code == PLUS
10818             || outer_code == AND
10819             || outer_code == IOR
10820             || outer_code == XOR
10821             || outer_code == MINUS))
10822         outer_code = SET;
10823
10824     const_int_cost:
10825       if (mode == SImode)
10826         {
10827           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10828                                                     INTVAL (x), NULL, NULL,
10829                                                     0, 0));
10830           /* Extra costs?  */
10831         }
10832       else
10833         {
10834           *cost += COSTS_N_INSNS (arm_gen_constant
10835                                   (outer_code, SImode, NULL,
10836                                    trunc_int_for_mode (INTVAL (x), SImode),
10837                                    NULL, NULL, 0, 0)
10838                                   + arm_gen_constant (outer_code, SImode, NULL,
10839                                                       INTVAL (x) >> 32, NULL,
10840                                                       NULL, 0, 0));
10841           /* Extra costs?  */
10842         }
10843
10844       return true;
10845
10846     case CONST:
10847     case LABEL_REF:
10848     case SYMBOL_REF:
10849       if (speed_p)
10850         {
10851           if (arm_arch_thumb2 && !flag_pic)
10852             *cost += COSTS_N_INSNS (1);
10853           else
10854             *cost += extra_cost->ldst.load;
10855         }
10856       else
10857         *cost += COSTS_N_INSNS (1);
10858
10859       if (flag_pic)
10860         {
10861           *cost += COSTS_N_INSNS (1);
10862           if (speed_p)
10863             *cost += extra_cost->alu.arith;
10864         }
10865
10866       return true;
10867
10868     case CONST_FIXED:
10869       *cost = COSTS_N_INSNS (4);
10870       /* Fixme.  */
10871       return true;
10872
10873     case CONST_DOUBLE:
10874       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10875           && (mode == SFmode || !TARGET_VFP_SINGLE))
10876         {
10877           if (vfp3_const_double_rtx (x))
10878             {
10879               if (speed_p)
10880                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10881               return true;
10882             }
10883
10884           if (speed_p)
10885             {
10886               if (mode == DFmode)
10887                 *cost += extra_cost->ldst.loadd;
10888               else
10889                 *cost += extra_cost->ldst.loadf;
10890             }
10891           else
10892             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10893
10894           return true;
10895         }
10896       *cost = COSTS_N_INSNS (4);
10897       return true;
10898
10899     case CONST_VECTOR:
10900       /* Fixme.  */
10901       if (TARGET_NEON
10902           && TARGET_HARD_FLOAT
10903           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10904           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10905         *cost = COSTS_N_INSNS (1);
10906       else
10907         *cost = COSTS_N_INSNS (4);
10908       return true;
10909
10910     case HIGH:
10911     case LO_SUM:
10912       /* When optimizing for size, we prefer constant pool entries to
10913          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10914       if (!speed_p)
10915         *cost += 1;
10916       return true;
10917
10918     case CLZ:
10919       if (speed_p)
10920         *cost += extra_cost->alu.clz;
10921       return false;
10922
10923     case SMIN:
10924       if (XEXP (x, 1) == const0_rtx)
10925         {
10926           if (speed_p)
10927             *cost += extra_cost->alu.log_shift;
10928           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10929           return true;
10930         }
10931       /* Fall through.  */
10932     case SMAX:
10933     case UMIN:
10934     case UMAX:
10935       *cost += COSTS_N_INSNS (1);
10936       return false;
10937
10938     case TRUNCATE:
10939       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10940           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10941           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10942           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10943           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10944                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10945               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10946                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10947                       == ZERO_EXTEND))))
10948         {
10949           if (speed_p)
10950             *cost += extra_cost->mult[1].extend;
10951           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10952                               ZERO_EXTEND, 0, speed_p)
10953                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10954                                 ZERO_EXTEND, 0, speed_p));
10955           return true;
10956         }
10957       *cost = LIBCALL_COST (1);
10958       return false;
10959
10960     case UNSPEC_VOLATILE:
10961     case UNSPEC:
10962       return arm_unspec_cost (x, outer_code, speed_p, cost);
10963
10964     case PC:
10965       /* Reading the PC is like reading any other register.  Writing it
10966          is more expensive, but we take that into account elsewhere.  */
10967       *cost = 0;
10968       return true;
10969
10970     case ZERO_EXTRACT:
10971       /* TODO: Simple zero_extract of bottom bits using AND.  */
10972       /* Fall through.  */
10973     case SIGN_EXTRACT:
10974       if (arm_arch6
10975           && mode == SImode
10976           && CONST_INT_P (XEXP (x, 1))
10977           && CONST_INT_P (XEXP (x, 2)))
10978         {
10979           if (speed_p)
10980             *cost += extra_cost->alu.bfx;
10981           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10982           return true;
10983         }
10984       /* Without UBFX/SBFX, need to resort to shift operations.  */
10985       *cost += COSTS_N_INSNS (1);
10986       if (speed_p)
10987         *cost += 2 * extra_cost->alu.shift;
10988       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10989       return true;
10990
10991     case FLOAT_EXTEND:
10992       if (TARGET_HARD_FLOAT)
10993         {
10994           if (speed_p)
10995             *cost += extra_cost->fp[mode == DFmode].widen;
10996           if (!TARGET_VFP5
10997               && GET_MODE (XEXP (x, 0)) == HFmode)
10998             {
10999               /* Pre v8, widening HF->DF is a two-step process, first
11000                  widening to SFmode.  */
11001               *cost += COSTS_N_INSNS (1);
11002               if (speed_p)
11003                 *cost += extra_cost->fp[0].widen;
11004             }
11005           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11006           return true;
11007         }
11008
11009       *cost = LIBCALL_COST (1);
11010       return false;
11011
11012     case FLOAT_TRUNCATE:
11013       if (TARGET_HARD_FLOAT)
11014         {
11015           if (speed_p)
11016             *cost += extra_cost->fp[mode == DFmode].narrow;
11017           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11018           return true;
11019           /* Vector modes?  */
11020         }
11021       *cost = LIBCALL_COST (1);
11022       return false;
11023
11024     case FMA:
11025       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11026         {
11027           rtx op0 = XEXP (x, 0);
11028           rtx op1 = XEXP (x, 1);
11029           rtx op2 = XEXP (x, 2);
11030
11031
11032           /* vfms or vfnma.  */
11033           if (GET_CODE (op0) == NEG)
11034             op0 = XEXP (op0, 0);
11035
11036           /* vfnms or vfnma.  */
11037           if (GET_CODE (op2) == NEG)
11038             op2 = XEXP (op2, 0);
11039
11040           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11041           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11042           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11043
11044           if (speed_p)
11045             *cost += extra_cost->fp[mode ==DFmode].fma;
11046
11047           return true;
11048         }
11049
11050       *cost = LIBCALL_COST (3);
11051       return false;
11052
11053     case FIX:
11054     case UNSIGNED_FIX:
11055       if (TARGET_HARD_FLOAT)
11056         {
11057           /* The *combine_vcvtf2i reduces a vmul+vcvt into
11058              a vcvt fixed-point conversion.  */
11059           if (code == FIX && mode == SImode
11060               && GET_CODE (XEXP (x, 0)) == FIX
11061               && GET_MODE (XEXP (x, 0)) == SFmode
11062               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11063               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11064                  > 0)
11065             {
11066               if (speed_p)
11067                 *cost += extra_cost->fp[0].toint;
11068
11069               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11070                                  code, 0, speed_p);
11071               return true;
11072             }
11073
11074           if (GET_MODE_CLASS (mode) == MODE_INT)
11075             {
11076               mode = GET_MODE (XEXP (x, 0));
11077               if (speed_p)
11078                 *cost += extra_cost->fp[mode == DFmode].toint;
11079               /* Strip of the 'cost' of rounding towards zero.  */
11080               if (GET_CODE (XEXP (x, 0)) == FIX)
11081                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11082                                    0, speed_p);
11083               else
11084                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11085               /* ??? Increase the cost to deal with transferring from
11086                  FP -> CORE registers?  */
11087               return true;
11088             }
11089           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11090                    && TARGET_VFP5)
11091             {
11092               if (speed_p)
11093                 *cost += extra_cost->fp[mode == DFmode].roundint;
11094               return false;
11095             }
11096           /* Vector costs? */
11097         }
11098       *cost = LIBCALL_COST (1);
11099       return false;
11100
11101     case FLOAT:
11102     case UNSIGNED_FLOAT:
11103       if (TARGET_HARD_FLOAT)
11104         {
11105           /* ??? Increase the cost to deal with transferring from CORE
11106              -> FP registers?  */
11107           if (speed_p)
11108             *cost += extra_cost->fp[mode == DFmode].fromint;
11109           return false;
11110         }
11111       *cost = LIBCALL_COST (1);
11112       return false;
11113
11114     case CALL:
11115       return true;
11116
11117     case ASM_OPERANDS:
11118       {
11119       /* Just a guess.  Guess number of instructions in the asm
11120          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11121          though (see PR60663).  */
11122         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11123         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11124
11125         *cost = COSTS_N_INSNS (asm_length + num_operands);
11126         return true;
11127       }
11128     default:
11129       if (mode != VOIDmode)
11130         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11131       else
11132         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11133       return false;
11134     }
11135 }
11136
11137 #undef HANDLE_NARROW_SHIFT_ARITH
11138
11139 /* RTX costs entry point.  */
11140
11141 static bool
11142 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11143                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11144 {
11145   bool result;
11146   int code = GET_CODE (x);
11147   gcc_assert (current_tune->insn_extra_cost);
11148
11149   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11150                                 (enum rtx_code) outer_code,
11151                                 current_tune->insn_extra_cost,
11152                                 total, speed);
11153
11154   if (dump_file && arm_verbose_cost)
11155     {
11156       print_rtl_single (dump_file, x);
11157       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11158                *total, result ? "final" : "partial");
11159     }
11160   return result;
11161 }
11162
11163 /* All address computations that can be done are free, but rtx cost returns
11164    the same for practically all of them.  So we weight the different types
11165    of address here in the order (most pref first):
11166    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11167 static inline int
11168 arm_arm_address_cost (rtx x)
11169 {
11170   enum rtx_code c  = GET_CODE (x);
11171
11172   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11173     return 0;
11174   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11175     return 10;
11176
11177   if (c == PLUS)
11178     {
11179       if (CONST_INT_P (XEXP (x, 1)))
11180         return 2;
11181
11182       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11183         return 3;
11184
11185       return 4;
11186     }
11187
11188   return 6;
11189 }
11190
11191 static inline int
11192 arm_thumb_address_cost (rtx x)
11193 {
11194   enum rtx_code c  = GET_CODE (x);
11195
11196   if (c == REG)
11197     return 1;
11198   if (c == PLUS
11199       && REG_P (XEXP (x, 0))
11200       && CONST_INT_P (XEXP (x, 1)))
11201     return 1;
11202
11203   return 2;
11204 }
11205
11206 static int
11207 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11208                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11209 {
11210   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11211 }
11212
11213 /* Adjust cost hook for XScale.  */
11214 static bool
11215 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11216                           int * cost)
11217 {
11218   /* Some true dependencies can have a higher cost depending
11219      on precisely how certain input operands are used.  */
11220   if (dep_type == 0
11221       && recog_memoized (insn) >= 0
11222       && recog_memoized (dep) >= 0)
11223     {
11224       int shift_opnum = get_attr_shift (insn);
11225       enum attr_type attr_type = get_attr_type (dep);
11226
11227       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11228          operand for INSN.  If we have a shifted input operand and the
11229          instruction we depend on is another ALU instruction, then we may
11230          have to account for an additional stall.  */
11231       if (shift_opnum != 0
11232           && (attr_type == TYPE_ALU_SHIFT_IMM
11233               || attr_type == TYPE_ALUS_SHIFT_IMM
11234               || attr_type == TYPE_LOGIC_SHIFT_IMM
11235               || attr_type == TYPE_LOGICS_SHIFT_IMM
11236               || attr_type == TYPE_ALU_SHIFT_REG
11237               || attr_type == TYPE_ALUS_SHIFT_REG
11238               || attr_type == TYPE_LOGIC_SHIFT_REG
11239               || attr_type == TYPE_LOGICS_SHIFT_REG
11240               || attr_type == TYPE_MOV_SHIFT
11241               || attr_type == TYPE_MVN_SHIFT
11242               || attr_type == TYPE_MOV_SHIFT_REG
11243               || attr_type == TYPE_MVN_SHIFT_REG))
11244         {
11245           rtx shifted_operand;
11246           int opno;
11247
11248           /* Get the shifted operand.  */
11249           extract_insn (insn);
11250           shifted_operand = recog_data.operand[shift_opnum];
11251
11252           /* Iterate over all the operands in DEP.  If we write an operand
11253              that overlaps with SHIFTED_OPERAND, then we have increase the
11254              cost of this dependency.  */
11255           extract_insn (dep);
11256           preprocess_constraints (dep);
11257           for (opno = 0; opno < recog_data.n_operands; opno++)
11258             {
11259               /* We can ignore strict inputs.  */
11260               if (recog_data.operand_type[opno] == OP_IN)
11261                 continue;
11262
11263               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11264                                            shifted_operand))
11265                 {
11266                   *cost = 2;
11267                   return false;
11268                 }
11269             }
11270         }
11271     }
11272   return true;
11273 }
11274
11275 /* Adjust cost hook for Cortex A9.  */
11276 static bool
11277 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11278                              int * cost)
11279 {
11280   switch (dep_type)
11281     {
11282     case REG_DEP_ANTI:
11283       *cost = 0;
11284       return false;
11285
11286     case REG_DEP_TRUE:
11287     case REG_DEP_OUTPUT:
11288         if (recog_memoized (insn) >= 0
11289             && recog_memoized (dep) >= 0)
11290           {
11291             if (GET_CODE (PATTERN (insn)) == SET)
11292               {
11293                 if (GET_MODE_CLASS
11294                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11295                   || GET_MODE_CLASS
11296                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11297                   {
11298                     enum attr_type attr_type_insn = get_attr_type (insn);
11299                     enum attr_type attr_type_dep = get_attr_type (dep);
11300
11301                     /* By default all dependencies of the form
11302                        s0 = s0 <op> s1
11303                        s0 = s0 <op> s2
11304                        have an extra latency of 1 cycle because
11305                        of the input and output dependency in this
11306                        case. However this gets modeled as an true
11307                        dependency and hence all these checks.  */
11308                     if (REG_P (SET_DEST (PATTERN (insn)))
11309                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11310                       {
11311                         /* FMACS is a special case where the dependent
11312                            instruction can be issued 3 cycles before
11313                            the normal latency in case of an output
11314                            dependency.  */
11315                         if ((attr_type_insn == TYPE_FMACS
11316                              || attr_type_insn == TYPE_FMACD)
11317                             && (attr_type_dep == TYPE_FMACS
11318                                 || attr_type_dep == TYPE_FMACD))
11319                           {
11320                             if (dep_type == REG_DEP_OUTPUT)
11321                               *cost = insn_default_latency (dep) - 3;
11322                             else
11323                               *cost = insn_default_latency (dep);
11324                             return false;
11325                           }
11326                         else
11327                           {
11328                             if (dep_type == REG_DEP_OUTPUT)
11329                               *cost = insn_default_latency (dep) + 1;
11330                             else
11331                               *cost = insn_default_latency (dep);
11332                           }
11333                         return false;
11334                       }
11335                   }
11336               }
11337           }
11338         break;
11339
11340     default:
11341       gcc_unreachable ();
11342     }
11343
11344   return true;
11345 }
11346
11347 /* Adjust cost hook for FA726TE.  */
11348 static bool
11349 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11350                            int * cost)
11351 {
11352   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11353      have penalty of 3.  */
11354   if (dep_type == REG_DEP_TRUE
11355       && recog_memoized (insn) >= 0
11356       && recog_memoized (dep) >= 0
11357       && get_attr_conds (dep) == CONDS_SET)
11358     {
11359       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11360       if (get_attr_conds (insn) == CONDS_USE
11361           && get_attr_type (insn) != TYPE_BRANCH)
11362         {
11363           *cost = 3;
11364           return false;
11365         }
11366
11367       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11368           || get_attr_conds (insn) == CONDS_USE)
11369         {
11370           *cost = 0;
11371           return false;
11372         }
11373     }
11374
11375   return true;
11376 }
11377
11378 /* Implement TARGET_REGISTER_MOVE_COST.
11379
11380    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11381    it is typically more expensive than a single memory access.  We set
11382    the cost to less than two memory accesses so that floating
11383    point to integer conversion does not go through memory.  */
11384
11385 int
11386 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11387                         reg_class_t from, reg_class_t to)
11388 {
11389   if (TARGET_32BIT)
11390     {
11391       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11392           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11393         return 15;
11394       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11395                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11396         return 4;
11397       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11398         return 20;
11399       else
11400         return 2;
11401     }
11402   else
11403     {
11404       if (from == HI_REGS || to == HI_REGS)
11405         return 4;
11406       else
11407         return 2;
11408     }
11409 }
11410
11411 /* Implement TARGET_MEMORY_MOVE_COST.  */
11412
11413 int
11414 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11415                       bool in ATTRIBUTE_UNUSED)
11416 {
11417   if (TARGET_32BIT)
11418     return 10;
11419   else
11420     {
11421       if (GET_MODE_SIZE (mode) < 4)
11422         return 8;
11423       else
11424         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11425     }
11426 }
11427
11428 /* Vectorizer cost model implementation.  */
11429
11430 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11431 static int
11432 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11433                                 tree vectype,
11434                                 int misalign ATTRIBUTE_UNUSED)
11435 {
11436   unsigned elements;
11437
11438   switch (type_of_cost)
11439     {
11440       case scalar_stmt:
11441         return current_tune->vec_costs->scalar_stmt_cost;
11442
11443       case scalar_load:
11444         return current_tune->vec_costs->scalar_load_cost;
11445
11446       case scalar_store:
11447         return current_tune->vec_costs->scalar_store_cost;
11448
11449       case vector_stmt:
11450         return current_tune->vec_costs->vec_stmt_cost;
11451
11452       case vector_load:
11453         return current_tune->vec_costs->vec_align_load_cost;
11454
11455       case vector_store:
11456         return current_tune->vec_costs->vec_store_cost;
11457
11458       case vec_to_scalar:
11459         return current_tune->vec_costs->vec_to_scalar_cost;
11460
11461       case scalar_to_vec:
11462         return current_tune->vec_costs->scalar_to_vec_cost;
11463
11464       case unaligned_load:
11465       case vector_gather_load:
11466         return current_tune->vec_costs->vec_unalign_load_cost;
11467
11468       case unaligned_store:
11469       case vector_scatter_store:
11470         return current_tune->vec_costs->vec_unalign_store_cost;
11471
11472       case cond_branch_taken:
11473         return current_tune->vec_costs->cond_taken_branch_cost;
11474
11475       case cond_branch_not_taken:
11476         return current_tune->vec_costs->cond_not_taken_branch_cost;
11477
11478       case vec_perm:
11479       case vec_promote_demote:
11480         return current_tune->vec_costs->vec_stmt_cost;
11481
11482       case vec_construct:
11483         elements = TYPE_VECTOR_SUBPARTS (vectype);
11484         return elements / 2 + 1;
11485
11486       default:
11487         gcc_unreachable ();
11488     }
11489 }
11490
11491 /* Implement targetm.vectorize.add_stmt_cost.  */
11492
11493 static unsigned
11494 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11495                    struct _stmt_vec_info *stmt_info, int misalign,
11496                    enum vect_cost_model_location where)
11497 {
11498   unsigned *cost = (unsigned *) data;
11499   unsigned retval = 0;
11500
11501   if (flag_vect_cost_model)
11502     {
11503       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11504       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11505
11506       /* Statements in an inner loop relative to the loop being
11507          vectorized are weighted more heavily.  The value here is
11508          arbitrary and could potentially be improved with analysis.  */
11509       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11510         count *= 50;  /* FIXME.  */
11511
11512       retval = (unsigned) (count * stmt_cost);
11513       cost[where] += retval;
11514     }
11515
11516   return retval;
11517 }
11518
11519 /* Return true if and only if this insn can dual-issue only as older.  */
11520 static bool
11521 cortexa7_older_only (rtx_insn *insn)
11522 {
11523   if (recog_memoized (insn) < 0)
11524     return false;
11525
11526   switch (get_attr_type (insn))
11527     {
11528     case TYPE_ALU_DSP_REG:
11529     case TYPE_ALU_SREG:
11530     case TYPE_ALUS_SREG:
11531     case TYPE_LOGIC_REG:
11532     case TYPE_LOGICS_REG:
11533     case TYPE_ADC_REG:
11534     case TYPE_ADCS_REG:
11535     case TYPE_ADR:
11536     case TYPE_BFM:
11537     case TYPE_REV:
11538     case TYPE_MVN_REG:
11539     case TYPE_SHIFT_IMM:
11540     case TYPE_SHIFT_REG:
11541     case TYPE_LOAD_BYTE:
11542     case TYPE_LOAD_4:
11543     case TYPE_STORE_4:
11544     case TYPE_FFARITHS:
11545     case TYPE_FADDS:
11546     case TYPE_FFARITHD:
11547     case TYPE_FADDD:
11548     case TYPE_FMOV:
11549     case TYPE_F_CVT:
11550     case TYPE_FCMPS:
11551     case TYPE_FCMPD:
11552     case TYPE_FCONSTS:
11553     case TYPE_FCONSTD:
11554     case TYPE_FMULS:
11555     case TYPE_FMACS:
11556     case TYPE_FMULD:
11557     case TYPE_FMACD:
11558     case TYPE_FDIVS:
11559     case TYPE_FDIVD:
11560     case TYPE_F_MRC:
11561     case TYPE_F_MRRC:
11562     case TYPE_F_FLAG:
11563     case TYPE_F_LOADS:
11564     case TYPE_F_STORES:
11565       return true;
11566     default:
11567       return false;
11568     }
11569 }
11570
11571 /* Return true if and only if this insn can dual-issue as younger.  */
11572 static bool
11573 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11574 {
11575   if (recog_memoized (insn) < 0)
11576     {
11577       if (verbose > 5)
11578         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11579       return false;
11580     }
11581
11582   switch (get_attr_type (insn))
11583     {
11584     case TYPE_ALU_IMM:
11585     case TYPE_ALUS_IMM:
11586     case TYPE_LOGIC_IMM:
11587     case TYPE_LOGICS_IMM:
11588     case TYPE_EXTEND:
11589     case TYPE_MVN_IMM:
11590     case TYPE_MOV_IMM:
11591     case TYPE_MOV_REG:
11592     case TYPE_MOV_SHIFT:
11593     case TYPE_MOV_SHIFT_REG:
11594     case TYPE_BRANCH:
11595     case TYPE_CALL:
11596       return true;
11597     default:
11598       return false;
11599     }
11600 }
11601
11602
11603 /* Look for an instruction that can dual issue only as an older
11604    instruction, and move it in front of any instructions that can
11605    dual-issue as younger, while preserving the relative order of all
11606    other instructions in the ready list.  This is a hueuristic to help
11607    dual-issue in later cycles, by postponing issue of more flexible
11608    instructions.  This heuristic may affect dual issue opportunities
11609    in the current cycle.  */
11610 static void
11611 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11612                         int *n_readyp, int clock)
11613 {
11614   int i;
11615   int first_older_only = -1, first_younger = -1;
11616
11617   if (verbose > 5)
11618     fprintf (file,
11619              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11620              clock,
11621              *n_readyp);
11622
11623   /* Traverse the ready list from the head (the instruction to issue
11624      first), and looking for the first instruction that can issue as
11625      younger and the first instruction that can dual-issue only as
11626      older.  */
11627   for (i = *n_readyp - 1; i >= 0; i--)
11628     {
11629       rtx_insn *insn = ready[i];
11630       if (cortexa7_older_only (insn))
11631         {
11632           first_older_only = i;
11633           if (verbose > 5)
11634             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11635           break;
11636         }
11637       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11638         first_younger = i;
11639     }
11640
11641   /* Nothing to reorder because either no younger insn found or insn
11642      that can dual-issue only as older appears before any insn that
11643      can dual-issue as younger.  */
11644   if (first_younger == -1)
11645     {
11646       if (verbose > 5)
11647         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11648       return;
11649     }
11650
11651   /* Nothing to reorder because no older-only insn in the ready list.  */
11652   if (first_older_only == -1)
11653     {
11654       if (verbose > 5)
11655         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11656       return;
11657     }
11658
11659   /* Move first_older_only insn before first_younger.  */
11660   if (verbose > 5)
11661     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11662              INSN_UID(ready [first_older_only]),
11663              INSN_UID(ready [first_younger]));
11664   rtx_insn *first_older_only_insn = ready [first_older_only];
11665   for (i = first_older_only; i < first_younger; i++)
11666     {
11667       ready[i] = ready[i+1];
11668     }
11669
11670   ready[i] = first_older_only_insn;
11671   return;
11672 }
11673
11674 /* Implement TARGET_SCHED_REORDER. */
11675 static int
11676 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11677                    int clock)
11678 {
11679   switch (arm_tune)
11680     {
11681     case TARGET_CPU_cortexa7:
11682       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11683       break;
11684     default:
11685       /* Do nothing for other cores.  */
11686       break;
11687     }
11688
11689   return arm_issue_rate ();
11690 }
11691
11692 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11693    It corrects the value of COST based on the relationship between
11694    INSN and DEP through the dependence LINK.  It returns the new
11695    value. There is a per-core adjust_cost hook to adjust scheduler costs
11696    and the per-core hook can choose to completely override the generic
11697    adjust_cost function. Only put bits of code into arm_adjust_cost that
11698    are common across all cores.  */
11699 static int
11700 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11701                  unsigned int)
11702 {
11703   rtx i_pat, d_pat;
11704
11705  /* When generating Thumb-1 code, we want to place flag-setting operations
11706     close to a conditional branch which depends on them, so that we can
11707     omit the comparison. */
11708   if (TARGET_THUMB1
11709       && dep_type == 0
11710       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11711       && recog_memoized (dep) >= 0
11712       && get_attr_conds (dep) == CONDS_SET)
11713     return 0;
11714
11715   if (current_tune->sched_adjust_cost != NULL)
11716     {
11717       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11718         return cost;
11719     }
11720
11721   /* XXX Is this strictly true?  */
11722   if (dep_type == REG_DEP_ANTI
11723       || dep_type == REG_DEP_OUTPUT)
11724     return 0;
11725
11726   /* Call insns don't incur a stall, even if they follow a load.  */
11727   if (dep_type == 0
11728       && CALL_P (insn))
11729     return 1;
11730
11731   if ((i_pat = single_set (insn)) != NULL
11732       && MEM_P (SET_SRC (i_pat))
11733       && (d_pat = single_set (dep)) != NULL
11734       && MEM_P (SET_DEST (d_pat)))
11735     {
11736       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11737       /* This is a load after a store, there is no conflict if the load reads
11738          from a cached area.  Assume that loads from the stack, and from the
11739          constant pool are cached, and that others will miss.  This is a
11740          hack.  */
11741
11742       if ((GET_CODE (src_mem) == SYMBOL_REF
11743            && CONSTANT_POOL_ADDRESS_P (src_mem))
11744           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11745           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11746           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11747         return 1;
11748     }
11749
11750   return cost;
11751 }
11752
11753 int
11754 arm_max_conditional_execute (void)
11755 {
11756   return max_insns_skipped;
11757 }
11758
11759 static int
11760 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11761 {
11762   if (TARGET_32BIT)
11763     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11764   else
11765     return (optimize > 0) ? 2 : 0;
11766 }
11767
11768 static int
11769 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11770 {
11771   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11772 }
11773
11774 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11775    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11776    sequences of non-executed instructions in IT blocks probably take the same
11777    amount of time as executed instructions (and the IT instruction itself takes
11778    space in icache).  This function was experimentally determined to give good
11779    results on a popular embedded benchmark.  */
11780
11781 static int
11782 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11783 {
11784   return (TARGET_32BIT && speed_p) ? 1
11785          : arm_default_branch_cost (speed_p, predictable_p);
11786 }
11787
11788 static int
11789 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11790 {
11791   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11792 }
11793
11794 static bool fp_consts_inited = false;
11795
11796 static REAL_VALUE_TYPE value_fp0;
11797
11798 static void
11799 init_fp_table (void)
11800 {
11801   REAL_VALUE_TYPE r;
11802
11803   r = REAL_VALUE_ATOF ("0", DFmode);
11804   value_fp0 = r;
11805   fp_consts_inited = true;
11806 }
11807
11808 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11809 int
11810 arm_const_double_rtx (rtx x)
11811 {
11812   const REAL_VALUE_TYPE *r;
11813
11814   if (!fp_consts_inited)
11815     init_fp_table ();
11816
11817   r = CONST_DOUBLE_REAL_VALUE (x);
11818   if (REAL_VALUE_MINUS_ZERO (*r))
11819     return 0;
11820
11821   if (real_equal (r, &value_fp0))
11822     return 1;
11823
11824   return 0;
11825 }
11826
11827 /* VFPv3 has a fairly wide range of representable immediates, formed from
11828    "quarter-precision" floating-point values. These can be evaluated using this
11829    formula (with ^ for exponentiation):
11830
11831      -1^s * n * 2^-r
11832
11833    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11834    16 <= n <= 31 and 0 <= r <= 7.
11835
11836    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11837
11838      - A (most-significant) is the sign bit.
11839      - BCD are the exponent (encoded as r XOR 3).
11840      - EFGH are the mantissa (encoded as n - 16).
11841 */
11842
11843 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11844    fconst[sd] instruction, or -1 if X isn't suitable.  */
11845 static int
11846 vfp3_const_double_index (rtx x)
11847 {
11848   REAL_VALUE_TYPE r, m;
11849   int sign, exponent;
11850   unsigned HOST_WIDE_INT mantissa, mant_hi;
11851   unsigned HOST_WIDE_INT mask;
11852   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11853   bool fail;
11854
11855   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11856     return -1;
11857
11858   r = *CONST_DOUBLE_REAL_VALUE (x);
11859
11860   /* We can't represent these things, so detect them first.  */
11861   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11862     return -1;
11863
11864   /* Extract sign, exponent and mantissa.  */
11865   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11866   r = real_value_abs (&r);
11867   exponent = REAL_EXP (&r);
11868   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11869      highest (sign) bit, with a fixed binary point at bit point_pos.
11870      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11871      bits for the mantissa, this may fail (low bits would be lost).  */
11872   real_ldexp (&m, &r, point_pos - exponent);
11873   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11874   mantissa = w.elt (0);
11875   mant_hi = w.elt (1);
11876
11877   /* If there are bits set in the low part of the mantissa, we can't
11878      represent this value.  */
11879   if (mantissa != 0)
11880     return -1;
11881
11882   /* Now make it so that mantissa contains the most-significant bits, and move
11883      the point_pos to indicate that the least-significant bits have been
11884      discarded.  */
11885   point_pos -= HOST_BITS_PER_WIDE_INT;
11886   mantissa = mant_hi;
11887
11888   /* We can permit four significant bits of mantissa only, plus a high bit
11889      which is always 1.  */
11890   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11891   if ((mantissa & mask) != 0)
11892     return -1;
11893
11894   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11895   mantissa >>= point_pos - 5;
11896
11897   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11898      floating-point immediate zero with Neon using an integer-zero load, but
11899      that case is handled elsewhere.)  */
11900   if (mantissa == 0)
11901     return -1;
11902
11903   gcc_assert (mantissa >= 16 && mantissa <= 31);
11904
11905   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11906      normalized significands are in the range [1, 2). (Our mantissa is shifted
11907      left 4 places at this point relative to normalized IEEE754 values).  GCC
11908      internally uses [0.5, 1) (see real.c), so the exponent returned from
11909      REAL_EXP must be altered.  */
11910   exponent = 5 - exponent;
11911
11912   if (exponent < 0 || exponent > 7)
11913     return -1;
11914
11915   /* Sign, mantissa and exponent are now in the correct form to plug into the
11916      formula described in the comment above.  */
11917   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11918 }
11919
11920 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11921 int
11922 vfp3_const_double_rtx (rtx x)
11923 {
11924   if (!TARGET_VFP3)
11925     return 0;
11926
11927   return vfp3_const_double_index (x) != -1;
11928 }
11929
11930 /* Recognize immediates which can be used in various Neon instructions. Legal
11931    immediates are described by the following table (for VMVN variants, the
11932    bitwise inverse of the constant shown is recognized. In either case, VMOV
11933    is output and the correct instruction to use for a given constant is chosen
11934    by the assembler). The constant shown is replicated across all elements of
11935    the destination vector.
11936
11937    insn elems variant constant (binary)
11938    ---- ----- ------- -----------------
11939    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11940    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11941    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11942    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11943    vmov  i16     4    00000000 abcdefgh
11944    vmov  i16     5    abcdefgh 00000000
11945    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11946    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11947    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11948    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11949    vmvn  i16    10    00000000 abcdefgh
11950    vmvn  i16    11    abcdefgh 00000000
11951    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11952    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11953    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11954    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11955    vmov   i8    16    abcdefgh
11956    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11957                       eeeeeeee ffffffff gggggggg hhhhhhhh
11958    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11959    vmov  f32    19    00000000 00000000 00000000 00000000
11960
11961    For case 18, B = !b. Representable values are exactly those accepted by
11962    vfp3_const_double_index, but are output as floating-point numbers rather
11963    than indices.
11964
11965    For case 19, we will change it to vmov.i32 when assembling.
11966
11967    Variants 0-5 (inclusive) may also be used as immediates for the second
11968    operand of VORR/VBIC instructions.
11969
11970    The INVERSE argument causes the bitwise inverse of the given operand to be
11971    recognized instead (used for recognizing legal immediates for the VAND/VORN
11972    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11973    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11974    output, rather than the real insns vbic/vorr).
11975
11976    INVERSE makes no difference to the recognition of float vectors.
11977
11978    The return value is the variant of immediate as shown in the above table, or
11979    -1 if the given value doesn't match any of the listed patterns.
11980 */
11981 static int
11982 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11983                       rtx *modconst, int *elementwidth)
11984 {
11985 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11986   matches = 1;                                  \
11987   for (i = 0; i < idx; i += (STRIDE))           \
11988     if (!(TEST))                                \
11989       matches = 0;                              \
11990   if (matches)                                  \
11991     {                                           \
11992       immtype = (CLASS);                        \
11993       elsize = (ELSIZE);                        \
11994       break;                                    \
11995     }
11996
11997   unsigned int i, elsize = 0, idx = 0, n_elts;
11998   unsigned int innersize;
11999   unsigned char bytes[16];
12000   int immtype = -1, matches;
12001   unsigned int invmask = inverse ? 0xff : 0;
12002   bool vector = GET_CODE (op) == CONST_VECTOR;
12003
12004   if (vector)
12005     n_elts = CONST_VECTOR_NUNITS (op);
12006   else
12007     {
12008       n_elts = 1;
12009       gcc_assert (mode != VOIDmode);
12010     }
12011
12012   innersize = GET_MODE_UNIT_SIZE (mode);
12013
12014   /* Vectors of float constants.  */
12015   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12016     {
12017       rtx el0 = CONST_VECTOR_ELT (op, 0);
12018
12019       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12020         return -1;
12021
12022       /* FP16 vectors cannot be represented.  */
12023       if (GET_MODE_INNER (mode) == HFmode)
12024         return -1;
12025
12026       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12027          are distinct in this context.  */
12028       if (!const_vec_duplicate_p (op))
12029         return -1;
12030
12031       if (modconst)
12032         *modconst = CONST_VECTOR_ELT (op, 0);
12033
12034       if (elementwidth)
12035         *elementwidth = 0;
12036
12037       if (el0 == CONST0_RTX (GET_MODE (el0)))
12038         return 19;
12039       else
12040         return 18;
12041     }
12042
12043   /* The tricks done in the code below apply for little-endian vector layout.
12044      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12045      FIXME: Implement logic for big-endian vectors.  */
12046   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12047     return -1;
12048
12049   /* Splat vector constant out into a byte vector.  */
12050   for (i = 0; i < n_elts; i++)
12051     {
12052       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12053       unsigned HOST_WIDE_INT elpart;
12054
12055       gcc_assert (CONST_INT_P (el));
12056       elpart = INTVAL (el);
12057
12058       for (unsigned int byte = 0; byte < innersize; byte++)
12059         {
12060           bytes[idx++] = (elpart & 0xff) ^ invmask;
12061           elpart >>= BITS_PER_UNIT;
12062         }
12063     }
12064
12065   /* Sanity check.  */
12066   gcc_assert (idx == GET_MODE_SIZE (mode));
12067
12068   do
12069     {
12070       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12071                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12072
12073       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12074                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12075
12076       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12077                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12078
12079       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12080                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12081
12082       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12083
12084       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12085
12086       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12087                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12088
12089       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12090                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12091
12092       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12093                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12094
12095       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12096                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12097
12098       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12099
12100       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12101
12102       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12103                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12104
12105       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12106                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12107
12108       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12109                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12110
12111       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12112                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12113
12114       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12115
12116       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12117                         && bytes[i] == bytes[(i + 8) % idx]);
12118     }
12119   while (0);
12120
12121   if (immtype == -1)
12122     return -1;
12123
12124   if (elementwidth)
12125     *elementwidth = elsize;
12126
12127   if (modconst)
12128     {
12129       unsigned HOST_WIDE_INT imm = 0;
12130
12131       /* Un-invert bytes of recognized vector, if necessary.  */
12132       if (invmask != 0)
12133         for (i = 0; i < idx; i++)
12134           bytes[i] ^= invmask;
12135
12136       if (immtype == 17)
12137         {
12138           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12139           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12140
12141           for (i = 0; i < 8; i++)
12142             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12143                    << (i * BITS_PER_UNIT);
12144
12145           *modconst = GEN_INT (imm);
12146         }
12147       else
12148         {
12149           unsigned HOST_WIDE_INT imm = 0;
12150
12151           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12152             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12153
12154           *modconst = GEN_INT (imm);
12155         }
12156     }
12157
12158   return immtype;
12159 #undef CHECK
12160 }
12161
12162 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12163    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12164    float elements), and a modified constant (whatever should be output for a
12165    VMOV) in *MODCONST.  */
12166
12167 int
12168 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12169                                rtx *modconst, int *elementwidth)
12170 {
12171   rtx tmpconst;
12172   int tmpwidth;
12173   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12174
12175   if (retval == -1)
12176     return 0;
12177
12178   if (modconst)
12179     *modconst = tmpconst;
12180
12181   if (elementwidth)
12182     *elementwidth = tmpwidth;
12183
12184   return 1;
12185 }
12186
12187 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12188    the immediate is valid, write a constant suitable for using as an operand
12189    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12190    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12191
12192 int
12193 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12194                                 rtx *modconst, int *elementwidth)
12195 {
12196   rtx tmpconst;
12197   int tmpwidth;
12198   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12199
12200   if (retval < 0 || retval > 5)
12201     return 0;
12202
12203   if (modconst)
12204     *modconst = tmpconst;
12205
12206   if (elementwidth)
12207     *elementwidth = tmpwidth;
12208
12209   return 1;
12210 }
12211
12212 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12213    the immediate is valid, write a constant suitable for using as an operand
12214    to VSHR/VSHL to *MODCONST and the corresponding element width to
12215    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12216    because they have different limitations.  */
12217
12218 int
12219 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12220                                 rtx *modconst, int *elementwidth,
12221                                 bool isleftshift)
12222 {
12223   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12224   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12225   unsigned HOST_WIDE_INT last_elt = 0;
12226   unsigned HOST_WIDE_INT maxshift;
12227
12228   /* Split vector constant out into a byte vector.  */
12229   for (i = 0; i < n_elts; i++)
12230     {
12231       rtx el = CONST_VECTOR_ELT (op, i);
12232       unsigned HOST_WIDE_INT elpart;
12233
12234       if (CONST_INT_P (el))
12235         elpart = INTVAL (el);
12236       else if (CONST_DOUBLE_P (el))
12237         return 0;
12238       else
12239         gcc_unreachable ();
12240
12241       if (i != 0 && elpart != last_elt)
12242         return 0;
12243
12244       last_elt = elpart;
12245     }
12246
12247   /* Shift less than element size.  */
12248   maxshift = innersize * 8;
12249
12250   if (isleftshift)
12251     {
12252       /* Left shift immediate value can be from 0 to <size>-1.  */
12253       if (last_elt >= maxshift)
12254         return 0;
12255     }
12256   else
12257     {
12258       /* Right shift immediate value can be from 1 to <size>.  */
12259       if (last_elt == 0 || last_elt > maxshift)
12260         return 0;
12261     }
12262
12263   if (elementwidth)
12264     *elementwidth = innersize * 8;
12265
12266   if (modconst)
12267     *modconst = CONST_VECTOR_ELT (op, 0);
12268
12269   return 1;
12270 }
12271
12272 /* Return a string suitable for output of Neon immediate logic operation
12273    MNEM.  */
12274
12275 char *
12276 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12277                              int inverse, int quad)
12278 {
12279   int width, is_valid;
12280   static char templ[40];
12281
12282   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12283
12284   gcc_assert (is_valid != 0);
12285
12286   if (quad)
12287     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12288   else
12289     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12290
12291   return templ;
12292 }
12293
12294 /* Return a string suitable for output of Neon immediate shift operation
12295    (VSHR or VSHL) MNEM.  */
12296
12297 char *
12298 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12299                              machine_mode mode, int quad,
12300                              bool isleftshift)
12301 {
12302   int width, is_valid;
12303   static char templ[40];
12304
12305   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12306   gcc_assert (is_valid != 0);
12307
12308   if (quad)
12309     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12310   else
12311     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12312
12313   return templ;
12314 }
12315
12316 /* Output a sequence of pairwise operations to implement a reduction.
12317    NOTE: We do "too much work" here, because pairwise operations work on two
12318    registers-worth of operands in one go. Unfortunately we can't exploit those
12319    extra calculations to do the full operation in fewer steps, I don't think.
12320    Although all vector elements of the result but the first are ignored, we
12321    actually calculate the same result in each of the elements. An alternative
12322    such as initially loading a vector with zero to use as each of the second
12323    operands would use up an additional register and take an extra instruction,
12324    for no particular gain.  */
12325
12326 void
12327 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12328                       rtx (*reduc) (rtx, rtx, rtx))
12329 {
12330   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12331   rtx tmpsum = op1;
12332
12333   for (i = parts / 2; i >= 1; i /= 2)
12334     {
12335       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12336       emit_insn (reduc (dest, tmpsum, tmpsum));
12337       tmpsum = dest;
12338     }
12339 }
12340
12341 /* If VALS is a vector constant that can be loaded into a register
12342    using VDUP, generate instructions to do so and return an RTX to
12343    assign to the register.  Otherwise return NULL_RTX.  */
12344
12345 static rtx
12346 neon_vdup_constant (rtx vals)
12347 {
12348   machine_mode mode = GET_MODE (vals);
12349   machine_mode inner_mode = GET_MODE_INNER (mode);
12350   rtx x;
12351
12352   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12353     return NULL_RTX;
12354
12355   if (!const_vec_duplicate_p (vals, &x))
12356     /* The elements are not all the same.  We could handle repeating
12357        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12358        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12359        vdup.i16).  */
12360     return NULL_RTX;
12361
12362   /* We can load this constant by using VDUP and a constant in a
12363      single ARM register.  This will be cheaper than a vector
12364      load.  */
12365
12366   x = copy_to_mode_reg (inner_mode, x);
12367   return gen_vec_duplicate (mode, x);
12368 }
12369
12370 /* Generate code to load VALS, which is a PARALLEL containing only
12371    constants (for vec_init) or CONST_VECTOR, efficiently into a
12372    register.  Returns an RTX to copy into the register, or NULL_RTX
12373    for a PARALLEL that cannot be converted into a CONST_VECTOR.  */
12374
12375 rtx
12376 neon_make_constant (rtx vals)
12377 {
12378   machine_mode mode = GET_MODE (vals);
12379   rtx target;
12380   rtx const_vec = NULL_RTX;
12381   int n_elts = GET_MODE_NUNITS (mode);
12382   int n_const = 0;
12383   int i;
12384
12385   if (GET_CODE (vals) == CONST_VECTOR)
12386     const_vec = vals;
12387   else if (GET_CODE (vals) == PARALLEL)
12388     {
12389       /* A CONST_VECTOR must contain only CONST_INTs and
12390          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12391          Only store valid constants in a CONST_VECTOR.  */
12392       for (i = 0; i < n_elts; ++i)
12393         {
12394           rtx x = XVECEXP (vals, 0, i);
12395           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12396             n_const++;
12397         }
12398       if (n_const == n_elts)
12399         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12400     }
12401   else
12402     gcc_unreachable ();
12403
12404   if (const_vec != NULL
12405       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12406     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12407     return const_vec;
12408   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12409     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12410        pipeline cycle; creating the constant takes one or two ARM
12411        pipeline cycles.  */
12412     return target;
12413   else if (const_vec != NULL_RTX)
12414     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12415        (for either double or quad vectors).  We cannot take advantage
12416        of single-cycle VLD1 because we need a PC-relative addressing
12417        mode.  */
12418     return const_vec;
12419   else
12420     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12421        We cannot construct an initializer.  */
12422     return NULL_RTX;
12423 }
12424
12425 /* Initialize vector TARGET to VALS.  */
12426
12427 void
12428 neon_expand_vector_init (rtx target, rtx vals)
12429 {
12430   machine_mode mode = GET_MODE (target);
12431   machine_mode inner_mode = GET_MODE_INNER (mode);
12432   int n_elts = GET_MODE_NUNITS (mode);
12433   int n_var = 0, one_var = -1;
12434   bool all_same = true;
12435   rtx x, mem;
12436   int i;
12437
12438   for (i = 0; i < n_elts; ++i)
12439     {
12440       x = XVECEXP (vals, 0, i);
12441       if (!CONSTANT_P (x))
12442         ++n_var, one_var = i;
12443
12444       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12445         all_same = false;
12446     }
12447
12448   if (n_var == 0)
12449     {
12450       rtx constant = neon_make_constant (vals);
12451       if (constant != NULL_RTX)
12452         {
12453           emit_move_insn (target, constant);
12454           return;
12455         }
12456     }
12457
12458   /* Splat a single non-constant element if we can.  */
12459   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12460     {
12461       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12462       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12463       return;
12464     }
12465
12466   /* One field is non-constant.  Load constant then overwrite varying
12467      field.  This is more efficient than using the stack.  */
12468   if (n_var == 1)
12469     {
12470       rtx copy = copy_rtx (vals);
12471       rtx merge_mask = GEN_INT (1 << one_var);
12472
12473       /* Load constant part of vector, substitute neighboring value for
12474          varying element.  */
12475       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12476       neon_expand_vector_init (target, copy);
12477
12478       /* Insert variable.  */
12479       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12480       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
12481       return;
12482     }
12483
12484   /* Construct the vector in memory one field at a time
12485      and load the whole vector.  */
12486   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12487   for (i = 0; i < n_elts; i++)
12488     emit_move_insn (adjust_address_nv (mem, inner_mode,
12489                                     i * GET_MODE_SIZE (inner_mode)),
12490                     XVECEXP (vals, 0, i));
12491   emit_move_insn (target, mem);
12492 }
12493
12494 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12495    ERR if it doesn't.  EXP indicates the source location, which includes the
12496    inlining history for intrinsics.  */
12497
12498 static void
12499 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12500               const_tree exp, const char *desc)
12501 {
12502   HOST_WIDE_INT lane;
12503
12504   gcc_assert (CONST_INT_P (operand));
12505
12506   lane = INTVAL (operand);
12507
12508   if (lane < low || lane >= high)
12509     {
12510       if (exp)
12511         error ("%K%s %wd out of range %wd - %wd",
12512                exp, desc, lane, low, high - 1);
12513       else
12514         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12515     }
12516 }
12517
12518 /* Bounds-check lanes.  */
12519
12520 void
12521 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12522                   const_tree exp)
12523 {
12524   bounds_check (operand, low, high, exp, "lane");
12525 }
12526
12527 /* Bounds-check constants.  */
12528
12529 void
12530 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12531 {
12532   bounds_check (operand, low, high, NULL_TREE, "constant");
12533 }
12534
12535 HOST_WIDE_INT
12536 neon_element_bits (machine_mode mode)
12537 {
12538   return GET_MODE_UNIT_BITSIZE (mode);
12539 }
12540
12541 \f
12542 /* Predicates for `match_operand' and `match_operator'.  */
12543
12544 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12545    WB is true if full writeback address modes are allowed and is false
12546    if limited writeback address modes (POST_INC and PRE_DEC) are
12547    allowed.  */
12548
12549 int
12550 arm_coproc_mem_operand (rtx op, bool wb)
12551 {
12552   rtx ind;
12553
12554   /* Reject eliminable registers.  */
12555   if (! (reload_in_progress || reload_completed || lra_in_progress)
12556       && (   reg_mentioned_p (frame_pointer_rtx, op)
12557           || reg_mentioned_p (arg_pointer_rtx, op)
12558           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12559           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12560           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12561           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12562     return FALSE;
12563
12564   /* Constants are converted into offsets from labels.  */
12565   if (!MEM_P (op))
12566     return FALSE;
12567
12568   ind = XEXP (op, 0);
12569
12570   if (reload_completed
12571       && (GET_CODE (ind) == LABEL_REF
12572           || (GET_CODE (ind) == CONST
12573               && GET_CODE (XEXP (ind, 0)) == PLUS
12574               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12575               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12576     return TRUE;
12577
12578   /* Match: (mem (reg)).  */
12579   if (REG_P (ind))
12580     return arm_address_register_rtx_p (ind, 0);
12581
12582   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12583      acceptable in any case (subject to verification by
12584      arm_address_register_rtx_p).  We need WB to be true to accept
12585      PRE_INC and POST_DEC.  */
12586   if (GET_CODE (ind) == POST_INC
12587       || GET_CODE (ind) == PRE_DEC
12588       || (wb
12589           && (GET_CODE (ind) == PRE_INC
12590               || GET_CODE (ind) == POST_DEC)))
12591     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12592
12593   if (wb
12594       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12595       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12596       && GET_CODE (XEXP (ind, 1)) == PLUS
12597       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12598     ind = XEXP (ind, 1);
12599
12600   /* Match:
12601      (plus (reg)
12602            (const)).  */
12603   if (GET_CODE (ind) == PLUS
12604       && REG_P (XEXP (ind, 0))
12605       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12606       && CONST_INT_P (XEXP (ind, 1))
12607       && INTVAL (XEXP (ind, 1)) > -1024
12608       && INTVAL (XEXP (ind, 1)) <  1024
12609       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12610     return TRUE;
12611
12612   return FALSE;
12613 }
12614
12615 /* Return TRUE if OP is a memory operand which we can load or store a vector
12616    to/from. TYPE is one of the following values:
12617     0 - Vector load/stor (vldr)
12618     1 - Core registers (ldm)
12619     2 - Element/structure loads (vld1)
12620  */
12621 int
12622 neon_vector_mem_operand (rtx op, int type, bool strict)
12623 {
12624   rtx ind;
12625
12626   /* Reject eliminable registers.  */
12627   if (strict && ! (reload_in_progress || reload_completed)
12628       && (reg_mentioned_p (frame_pointer_rtx, op)
12629           || reg_mentioned_p (arg_pointer_rtx, op)
12630           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12631           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12632           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12633           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12634     return FALSE;
12635
12636   /* Constants are converted into offsets from labels.  */
12637   if (!MEM_P (op))
12638     return FALSE;
12639
12640   ind = XEXP (op, 0);
12641
12642   if (reload_completed
12643       && (GET_CODE (ind) == LABEL_REF
12644           || (GET_CODE (ind) == CONST
12645               && GET_CODE (XEXP (ind, 0)) == PLUS
12646               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12647               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12648     return TRUE;
12649
12650   /* Match: (mem (reg)).  */
12651   if (REG_P (ind))
12652     return arm_address_register_rtx_p (ind, 0);
12653
12654   /* Allow post-increment with Neon registers.  */
12655   if ((type != 1 && GET_CODE (ind) == POST_INC)
12656       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12657     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12658
12659   /* Allow post-increment by register for VLDn */
12660   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12661       && GET_CODE (XEXP (ind, 1)) == PLUS
12662       && REG_P (XEXP (XEXP (ind, 1), 1)))
12663      return true;
12664
12665   /* Match:
12666      (plus (reg)
12667           (const)).  */
12668   if (type == 0
12669       && GET_CODE (ind) == PLUS
12670       && REG_P (XEXP (ind, 0))
12671       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12672       && CONST_INT_P (XEXP (ind, 1))
12673       && INTVAL (XEXP (ind, 1)) > -1024
12674       /* For quad modes, we restrict the constant offset to be slightly less
12675          than what the instruction format permits.  We have no such constraint
12676          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12677       && (INTVAL (XEXP (ind, 1))
12678           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12679       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12680     return TRUE;
12681
12682   return FALSE;
12683 }
12684
12685 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12686    type.  */
12687 int
12688 neon_struct_mem_operand (rtx op)
12689 {
12690   rtx ind;
12691
12692   /* Reject eliminable registers.  */
12693   if (! (reload_in_progress || reload_completed)
12694       && (   reg_mentioned_p (frame_pointer_rtx, op)
12695           || reg_mentioned_p (arg_pointer_rtx, op)
12696           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12697           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12698           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12699           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12700     return FALSE;
12701
12702   /* Constants are converted into offsets from labels.  */
12703   if (!MEM_P (op))
12704     return FALSE;
12705
12706   ind = XEXP (op, 0);
12707
12708   if (reload_completed
12709       && (GET_CODE (ind) == LABEL_REF
12710           || (GET_CODE (ind) == CONST
12711               && GET_CODE (XEXP (ind, 0)) == PLUS
12712               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12713               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12714     return TRUE;
12715
12716   /* Match: (mem (reg)).  */
12717   if (REG_P (ind))
12718     return arm_address_register_rtx_p (ind, 0);
12719
12720   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12721   if (GET_CODE (ind) == POST_INC
12722       || GET_CODE (ind) == PRE_DEC)
12723     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12724
12725   return FALSE;
12726 }
12727
12728 /* Prepares the operands for the VCMLA by lane instruction such that the right
12729    register number is selected.  This instruction is special in that it always
12730    requires a D register, however there is a choice to be made between Dn[0],
12731    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
12732
12733    The VCMLA by lane function always selects two values. For instance given D0
12734    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
12735    used by the instruction.  However given V4SF then index 0 and 1 are valid as
12736    D0[0] or D1[0] are both valid.
12737
12738    This function centralizes that information based on OPERANDS, OPERANDS[3]
12739    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
12740    updated to contain the right index.  */
12741
12742 rtx *
12743 neon_vcmla_lane_prepare_operands (rtx *operands)
12744 {
12745   int lane = INTVAL (operands[4]);
12746   machine_mode constmode = SImode;
12747   machine_mode mode = GET_MODE (operands[3]);
12748   int regno = REGNO (operands[3]);
12749   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
12750   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
12751     {
12752       operands[3] = gen_int_mode (regno + 1, constmode);
12753       operands[4]
12754         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
12755     }
12756   else
12757     {
12758       operands[3] = gen_int_mode (regno, constmode);
12759       operands[4] = gen_int_mode (lane, constmode);
12760     }
12761   return operands;
12762 }
12763
12764
12765 /* Return true if X is a register that will be eliminated later on.  */
12766 int
12767 arm_eliminable_register (rtx x)
12768 {
12769   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12770                        || REGNO (x) == ARG_POINTER_REGNUM
12771                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12772                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12773 }
12774
12775 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12776    coprocessor registers.  Otherwise return NO_REGS.  */
12777
12778 enum reg_class
12779 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12780 {
12781   if (mode == HFmode)
12782     {
12783       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12784         return GENERAL_REGS;
12785       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12786         return NO_REGS;
12787       return GENERAL_REGS;
12788     }
12789
12790   /* The neon move patterns handle all legitimate vector and struct
12791      addresses.  */
12792   if (TARGET_NEON
12793       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12794       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12795           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12796           || VALID_NEON_STRUCT_MODE (mode)))
12797     return NO_REGS;
12798
12799   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12800     return NO_REGS;
12801
12802   return GENERAL_REGS;
12803 }
12804
12805 /* Values which must be returned in the most-significant end of the return
12806    register.  */
12807
12808 static bool
12809 arm_return_in_msb (const_tree valtype)
12810 {
12811   return (TARGET_AAPCS_BASED
12812           && BYTES_BIG_ENDIAN
12813           && (AGGREGATE_TYPE_P (valtype)
12814               || TREE_CODE (valtype) == COMPLEX_TYPE
12815               || FIXED_POINT_TYPE_P (valtype)));
12816 }
12817
12818 /* Return TRUE if X references a SYMBOL_REF.  */
12819 int
12820 symbol_mentioned_p (rtx x)
12821 {
12822   const char * fmt;
12823   int i;
12824
12825   if (GET_CODE (x) == SYMBOL_REF)
12826     return 1;
12827
12828   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12829      are constant offsets, not symbols.  */
12830   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12831     return 0;
12832
12833   fmt = GET_RTX_FORMAT (GET_CODE (x));
12834
12835   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12836     {
12837       if (fmt[i] == 'E')
12838         {
12839           int j;
12840
12841           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12842             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12843               return 1;
12844         }
12845       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12846         return 1;
12847     }
12848
12849   return 0;
12850 }
12851
12852 /* Return TRUE if X references a LABEL_REF.  */
12853 int
12854 label_mentioned_p (rtx x)
12855 {
12856   const char * fmt;
12857   int i;
12858
12859   if (GET_CODE (x) == LABEL_REF)
12860     return 1;
12861
12862   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12863      instruction, but they are constant offsets, not symbols.  */
12864   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12865     return 0;
12866
12867   fmt = GET_RTX_FORMAT (GET_CODE (x));
12868   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12869     {
12870       if (fmt[i] == 'E')
12871         {
12872           int j;
12873
12874           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12875             if (label_mentioned_p (XVECEXP (x, i, j)))
12876               return 1;
12877         }
12878       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12879         return 1;
12880     }
12881
12882   return 0;
12883 }
12884
12885 int
12886 tls_mentioned_p (rtx x)
12887 {
12888   switch (GET_CODE (x))
12889     {
12890     case CONST:
12891       return tls_mentioned_p (XEXP (x, 0));
12892
12893     case UNSPEC:
12894       if (XINT (x, 1) == UNSPEC_TLS)
12895         return 1;
12896
12897     /* Fall through.  */
12898     default:
12899       return 0;
12900     }
12901 }
12902
12903 /* Must not copy any rtx that uses a pc-relative address.
12904    Also, disallow copying of load-exclusive instructions that
12905    may appear after splitting of compare-and-swap-style operations
12906    so as to prevent those loops from being transformed away from their
12907    canonical forms (see PR 69904).  */
12908
12909 static bool
12910 arm_cannot_copy_insn_p (rtx_insn *insn)
12911 {
12912   /* The tls call insn cannot be copied, as it is paired with a data
12913      word.  */
12914   if (recog_memoized (insn) == CODE_FOR_tlscall)
12915     return true;
12916
12917   subrtx_iterator::array_type array;
12918   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12919     {
12920       const_rtx x = *iter;
12921       if (GET_CODE (x) == UNSPEC
12922           && (XINT (x, 1) == UNSPEC_PIC_BASE
12923               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12924         return true;
12925     }
12926
12927   rtx set = single_set (insn);
12928   if (set)
12929     {
12930       rtx src = SET_SRC (set);
12931       if (GET_CODE (src) == ZERO_EXTEND)
12932         src = XEXP (src, 0);
12933
12934       /* Catch the load-exclusive and load-acquire operations.  */
12935       if (GET_CODE (src) == UNSPEC_VOLATILE
12936           && (XINT (src, 1) == VUNSPEC_LL
12937               || XINT (src, 1) == VUNSPEC_LAX))
12938         return true;
12939     }
12940   return false;
12941 }
12942
12943 enum rtx_code
12944 minmax_code (rtx x)
12945 {
12946   enum rtx_code code = GET_CODE (x);
12947
12948   switch (code)
12949     {
12950     case SMAX:
12951       return GE;
12952     case SMIN:
12953       return LE;
12954     case UMIN:
12955       return LEU;
12956     case UMAX:
12957       return GEU;
12958     default:
12959       gcc_unreachable ();
12960     }
12961 }
12962
12963 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12964
12965 bool
12966 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12967                         int *mask, bool *signed_sat)
12968 {
12969   /* The high bound must be a power of two minus one.  */
12970   int log = exact_log2 (INTVAL (hi_bound) + 1);
12971   if (log == -1)
12972     return false;
12973
12974   /* The low bound is either zero (for usat) or one less than the
12975      negation of the high bound (for ssat).  */
12976   if (INTVAL (lo_bound) == 0)
12977     {
12978       if (mask)
12979         *mask = log;
12980       if (signed_sat)
12981         *signed_sat = false;
12982
12983       return true;
12984     }
12985
12986   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12987     {
12988       if (mask)
12989         *mask = log + 1;
12990       if (signed_sat)
12991         *signed_sat = true;
12992
12993       return true;
12994     }
12995
12996   return false;
12997 }
12998
12999 /* Return 1 if memory locations are adjacent.  */
13000 int
13001 adjacent_mem_locations (rtx a, rtx b)
13002 {
13003   /* We don't guarantee to preserve the order of these memory refs.  */
13004   if (volatile_refs_p (a) || volatile_refs_p (b))
13005     return 0;
13006
13007   if ((REG_P (XEXP (a, 0))
13008        || (GET_CODE (XEXP (a, 0)) == PLUS
13009            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13010       && (REG_P (XEXP (b, 0))
13011           || (GET_CODE (XEXP (b, 0)) == PLUS
13012               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13013     {
13014       HOST_WIDE_INT val0 = 0, val1 = 0;
13015       rtx reg0, reg1;
13016       int val_diff;
13017
13018       if (GET_CODE (XEXP (a, 0)) == PLUS)
13019         {
13020           reg0 = XEXP (XEXP (a, 0), 0);
13021           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13022         }
13023       else
13024         reg0 = XEXP (a, 0);
13025
13026       if (GET_CODE (XEXP (b, 0)) == PLUS)
13027         {
13028           reg1 = XEXP (XEXP (b, 0), 0);
13029           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13030         }
13031       else
13032         reg1 = XEXP (b, 0);
13033
13034       /* Don't accept any offset that will require multiple
13035          instructions to handle, since this would cause the
13036          arith_adjacentmem pattern to output an overlong sequence.  */
13037       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13038         return 0;
13039
13040       /* Don't allow an eliminable register: register elimination can make
13041          the offset too large.  */
13042       if (arm_eliminable_register (reg0))
13043         return 0;
13044
13045       val_diff = val1 - val0;
13046
13047       if (arm_ld_sched)
13048         {
13049           /* If the target has load delay slots, then there's no benefit
13050              to using an ldm instruction unless the offset is zero and
13051              we are optimizing for size.  */
13052           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13053                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13054                   && (val_diff == 4 || val_diff == -4));
13055         }
13056
13057       return ((REGNO (reg0) == REGNO (reg1))
13058               && (val_diff == 4 || val_diff == -4));
13059     }
13060
13061   return 0;
13062 }
13063
13064 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13065    for load operations, false for store operations.  CONSECUTIVE is true
13066    if the register numbers in the operation must be consecutive in the register
13067    bank. RETURN_PC is true if value is to be loaded in PC.
13068    The pattern we are trying to match for load is:
13069      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13070       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13071        :
13072        :
13073       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13074      ]
13075      where
13076      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13077      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13078      3.  If consecutive is TRUE, then for kth register being loaded,
13079          REGNO (R_dk) = REGNO (R_d0) + k.
13080    The pattern for store is similar.  */
13081 bool
13082 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13083                      bool consecutive, bool return_pc)
13084 {
13085   HOST_WIDE_INT count = XVECLEN (op, 0);
13086   rtx reg, mem, addr;
13087   unsigned regno;
13088   unsigned first_regno;
13089   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13090   rtx elt;
13091   bool addr_reg_in_reglist = false;
13092   bool update = false;
13093   int reg_increment;
13094   int offset_adj;
13095   int regs_per_val;
13096
13097   /* If not in SImode, then registers must be consecutive
13098      (e.g., VLDM instructions for DFmode).  */
13099   gcc_assert ((mode == SImode) || consecutive);
13100   /* Setting return_pc for stores is illegal.  */
13101   gcc_assert (!return_pc || load);
13102
13103   /* Set up the increments and the regs per val based on the mode.  */
13104   reg_increment = GET_MODE_SIZE (mode);
13105   regs_per_val = reg_increment / 4;
13106   offset_adj = return_pc ? 1 : 0;
13107
13108   if (count <= 1
13109       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13110       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13111     return false;
13112
13113   /* Check if this is a write-back.  */
13114   elt = XVECEXP (op, 0, offset_adj);
13115   if (GET_CODE (SET_SRC (elt)) == PLUS)
13116     {
13117       i++;
13118       base = 1;
13119       update = true;
13120
13121       /* The offset adjustment must be the number of registers being
13122          popped times the size of a single register.  */
13123       if (!REG_P (SET_DEST (elt))
13124           || !REG_P (XEXP (SET_SRC (elt), 0))
13125           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13126           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13127           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13128              ((count - 1 - offset_adj) * reg_increment))
13129         return false;
13130     }
13131
13132   i = i + offset_adj;
13133   base = base + offset_adj;
13134   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13135      success depends on the type: VLDM can do just one reg,
13136      LDM must do at least two.  */
13137   if ((count <= i) && (mode == SImode))
13138       return false;
13139
13140   elt = XVECEXP (op, 0, i - 1);
13141   if (GET_CODE (elt) != SET)
13142     return false;
13143
13144   if (load)
13145     {
13146       reg = SET_DEST (elt);
13147       mem = SET_SRC (elt);
13148     }
13149   else
13150     {
13151       reg = SET_SRC (elt);
13152       mem = SET_DEST (elt);
13153     }
13154
13155   if (!REG_P (reg) || !MEM_P (mem))
13156     return false;
13157
13158   regno = REGNO (reg);
13159   first_regno = regno;
13160   addr = XEXP (mem, 0);
13161   if (GET_CODE (addr) == PLUS)
13162     {
13163       if (!CONST_INT_P (XEXP (addr, 1)))
13164         return false;
13165
13166       offset = INTVAL (XEXP (addr, 1));
13167       addr = XEXP (addr, 0);
13168     }
13169
13170   if (!REG_P (addr))
13171     return false;
13172
13173   /* Don't allow SP to be loaded unless it is also the base register. It
13174      guarantees that SP is reset correctly when an LDM instruction
13175      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13176   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13177     return false;
13178
13179   if (regno == REGNO (addr))
13180     addr_reg_in_reglist = true;
13181
13182   for (; i < count; i++)
13183     {
13184       elt = XVECEXP (op, 0, i);
13185       if (GET_CODE (elt) != SET)
13186         return false;
13187
13188       if (load)
13189         {
13190           reg = SET_DEST (elt);
13191           mem = SET_SRC (elt);
13192         }
13193       else
13194         {
13195           reg = SET_SRC (elt);
13196           mem = SET_DEST (elt);
13197         }
13198
13199       if (!REG_P (reg)
13200           || GET_MODE (reg) != mode
13201           || REGNO (reg) <= regno
13202           || (consecutive
13203               && (REGNO (reg) !=
13204                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13205           /* Don't allow SP to be loaded unless it is also the base register. It
13206              guarantees that SP is reset correctly when an LDM instruction
13207              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13208           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13209           || !MEM_P (mem)
13210           || GET_MODE (mem) != mode
13211           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13212                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13213                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13214                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13215                    offset + (i - base) * reg_increment))
13216               && (!REG_P (XEXP (mem, 0))
13217                   || offset + (i - base) * reg_increment != 0)))
13218         return false;
13219
13220       regno = REGNO (reg);
13221       if (regno == REGNO (addr))
13222         addr_reg_in_reglist = true;
13223     }
13224
13225   if (load)
13226     {
13227       if (update && addr_reg_in_reglist)
13228         return false;
13229
13230       /* For Thumb-1, address register is always modified - either by write-back
13231          or by explicit load.  If the pattern does not describe an update,
13232          then the address register must be in the list of loaded registers.  */
13233       if (TARGET_THUMB1)
13234         return update || addr_reg_in_reglist;
13235     }
13236
13237   return true;
13238 }
13239
13240 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13241    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13242    instruction.  ADD_OFFSET is nonzero if the base address register needs
13243    to be modified with an add instruction before we can use it.  */
13244
13245 static bool
13246 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13247                                  int nops, HOST_WIDE_INT add_offset)
13248  {
13249   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13250      if the offset isn't small enough.  The reason 2 ldrs are faster
13251      is because these ARMs are able to do more than one cache access
13252      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13253      whilst the ARM8 has a double bandwidth cache.  This means that
13254      these cores can do both an instruction fetch and a data fetch in
13255      a single cycle, so the trick of calculating the address into a
13256      scratch register (one of the result regs) and then doing a load
13257      multiple actually becomes slower (and no smaller in code size).
13258      That is the transformation
13259
13260         ldr     rd1, [rbase + offset]
13261         ldr     rd2, [rbase + offset + 4]
13262
13263      to
13264
13265         add     rd1, rbase, offset
13266         ldmia   rd1, {rd1, rd2}
13267
13268      produces worse code -- '3 cycles + any stalls on rd2' instead of
13269      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13270      access per cycle, the first sequence could never complete in less
13271      than 6 cycles, whereas the ldm sequence would only take 5 and
13272      would make better use of sequential accesses if not hitting the
13273      cache.
13274
13275      We cheat here and test 'arm_ld_sched' which we currently know to
13276      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13277      changes, then the test below needs to be reworked.  */
13278   if (nops == 2 && arm_ld_sched && add_offset != 0)
13279     return false;
13280
13281   /* XScale has load-store double instructions, but they have stricter
13282      alignment requirements than load-store multiple, so we cannot
13283      use them.
13284
13285      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13286      the pipeline until completion.
13287
13288         NREGS           CYCLES
13289           1               3
13290           2               4
13291           3               5
13292           4               6
13293
13294      An ldr instruction takes 1-3 cycles, but does not block the
13295      pipeline.
13296
13297         NREGS           CYCLES
13298           1              1-3
13299           2              2-6
13300           3              3-9
13301           4              4-12
13302
13303      Best case ldr will always win.  However, the more ldr instructions
13304      we issue, the less likely we are to be able to schedule them well.
13305      Using ldr instructions also increases code size.
13306
13307      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13308      for counts of 3 or 4 regs.  */
13309   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13310     return false;
13311   return true;
13312 }
13313
13314 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13315    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13316    an array ORDER which describes the sequence to use when accessing the
13317    offsets that produces an ascending order.  In this sequence, each
13318    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13319    must have been filled in with the lowest offset by the caller.
13320    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13321    we use to verify that ORDER produces an ascending order of registers.
13322    Return true if it was possible to construct such an order, false if
13323    not.  */
13324
13325 static bool
13326 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13327                       int *unsorted_regs)
13328 {
13329   int i;
13330   for (i = 1; i < nops; i++)
13331     {
13332       int j;
13333
13334       order[i] = order[i - 1];
13335       for (j = 0; j < nops; j++)
13336         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13337           {
13338             /* We must find exactly one offset that is higher than the
13339                previous one by 4.  */
13340             if (order[i] != order[i - 1])
13341               return false;
13342             order[i] = j;
13343           }
13344       if (order[i] == order[i - 1])
13345         return false;
13346       /* The register numbers must be ascending.  */
13347       if (unsorted_regs != NULL
13348           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13349         return false;
13350     }
13351   return true;
13352 }
13353
13354 /* Used to determine in a peephole whether a sequence of load
13355    instructions can be changed into a load-multiple instruction.
13356    NOPS is the number of separate load instructions we are examining.  The
13357    first NOPS entries in OPERANDS are the destination registers, the
13358    next NOPS entries are memory operands.  If this function is
13359    successful, *BASE is set to the common base register of the memory
13360    accesses; *LOAD_OFFSET is set to the first memory location's offset
13361    from that base register.
13362    REGS is an array filled in with the destination register numbers.
13363    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13364    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13365    the sequence of registers in REGS matches the loads from ascending memory
13366    locations, and the function verifies that the register numbers are
13367    themselves ascending.  If CHECK_REGS is false, the register numbers
13368    are stored in the order they are found in the operands.  */
13369 static int
13370 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13371                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13372 {
13373   int unsorted_regs[MAX_LDM_STM_OPS];
13374   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13375   int order[MAX_LDM_STM_OPS];
13376   int base_reg = -1;
13377   int i, ldm_case;
13378
13379   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13380      easily extended if required.  */
13381   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13382
13383   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13384
13385   /* Loop over the operands and check that the memory references are
13386      suitable (i.e. immediate offsets from the same base register).  At
13387      the same time, extract the target register, and the memory
13388      offsets.  */
13389   for (i = 0; i < nops; i++)
13390     {
13391       rtx reg;
13392       rtx offset;
13393
13394       /* Convert a subreg of a mem into the mem itself.  */
13395       if (GET_CODE (operands[nops + i]) == SUBREG)
13396         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13397
13398       gcc_assert (MEM_P (operands[nops + i]));
13399
13400       /* Don't reorder volatile memory references; it doesn't seem worth
13401          looking for the case where the order is ok anyway.  */
13402       if (MEM_VOLATILE_P (operands[nops + i]))
13403         return 0;
13404
13405       offset = const0_rtx;
13406
13407       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13408            || (GET_CODE (reg) == SUBREG
13409                && REG_P (reg = SUBREG_REG (reg))))
13410           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13411               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13412                   || (GET_CODE (reg) == SUBREG
13413                       && REG_P (reg = SUBREG_REG (reg))))
13414               && (CONST_INT_P (offset
13415                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13416         {
13417           if (i == 0)
13418             {
13419               base_reg = REGNO (reg);
13420               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13421                 return 0;
13422             }
13423           else if (base_reg != (int) REGNO (reg))
13424             /* Not addressed from the same base register.  */
13425             return 0;
13426
13427           unsorted_regs[i] = (REG_P (operands[i])
13428                               ? REGNO (operands[i])
13429                               : REGNO (SUBREG_REG (operands[i])));
13430
13431           /* If it isn't an integer register, or if it overwrites the
13432              base register but isn't the last insn in the list, then
13433              we can't do this.  */
13434           if (unsorted_regs[i] < 0
13435               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13436               || unsorted_regs[i] > 14
13437               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13438             return 0;
13439
13440           /* Don't allow SP to be loaded unless it is also the base
13441              register.  It guarantees that SP is reset correctly when
13442              an LDM instruction is interrupted.  Otherwise, we might
13443              end up with a corrupt stack.  */
13444           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13445             return 0;
13446
13447           unsorted_offsets[i] = INTVAL (offset);
13448           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13449             order[0] = i;
13450         }
13451       else
13452         /* Not a suitable memory address.  */
13453         return 0;
13454     }
13455
13456   /* All the useful information has now been extracted from the
13457      operands into unsorted_regs and unsorted_offsets; additionally,
13458      order[0] has been set to the lowest offset in the list.  Sort
13459      the offsets into order, verifying that they are adjacent, and
13460      check that the register numbers are ascending.  */
13461   if (!compute_offset_order (nops, unsorted_offsets, order,
13462                              check_regs ? unsorted_regs : NULL))
13463     return 0;
13464
13465   if (saved_order)
13466     memcpy (saved_order, order, sizeof order);
13467
13468   if (base)
13469     {
13470       *base = base_reg;
13471
13472       for (i = 0; i < nops; i++)
13473         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13474
13475       *load_offset = unsorted_offsets[order[0]];
13476     }
13477
13478   if (unsorted_offsets[order[0]] == 0)
13479     ldm_case = 1; /* ldmia */
13480   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13481     ldm_case = 2; /* ldmib */
13482   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13483     ldm_case = 3; /* ldmda */
13484   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13485     ldm_case = 4; /* ldmdb */
13486   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13487            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13488     ldm_case = 5;
13489   else
13490     return 0;
13491
13492   if (!multiple_operation_profitable_p (false, nops,
13493                                         ldm_case == 5
13494                                         ? unsorted_offsets[order[0]] : 0))
13495     return 0;
13496
13497   return ldm_case;
13498 }
13499
13500 /* Used to determine in a peephole whether a sequence of store instructions can
13501    be changed into a store-multiple instruction.
13502    NOPS is the number of separate store instructions we are examining.
13503    NOPS_TOTAL is the total number of instructions recognized by the peephole
13504    pattern.
13505    The first NOPS entries in OPERANDS are the source registers, the next
13506    NOPS entries are memory operands.  If this function is successful, *BASE is
13507    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13508    to the first memory location's offset from that base register.  REGS is an
13509    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13510    likewise filled with the corresponding rtx's.
13511    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13512    numbers to an ascending order of stores.
13513    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13514    from ascending memory locations, and the function verifies that the register
13515    numbers are themselves ascending.  If CHECK_REGS is false, the register
13516    numbers are stored in the order they are found in the operands.  */
13517 static int
13518 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13519                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13520                          HOST_WIDE_INT *load_offset, bool check_regs)
13521 {
13522   int unsorted_regs[MAX_LDM_STM_OPS];
13523   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13524   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13525   int order[MAX_LDM_STM_OPS];
13526   int base_reg = -1;
13527   rtx base_reg_rtx = NULL;
13528   int i, stm_case;
13529
13530   /* Write back of base register is currently only supported for Thumb 1.  */
13531   int base_writeback = TARGET_THUMB1;
13532
13533   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13534      easily extended if required.  */
13535   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13536
13537   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13538
13539   /* Loop over the operands and check that the memory references are
13540      suitable (i.e. immediate offsets from the same base register).  At
13541      the same time, extract the target register, and the memory
13542      offsets.  */
13543   for (i = 0; i < nops; i++)
13544     {
13545       rtx reg;
13546       rtx offset;
13547
13548       /* Convert a subreg of a mem into the mem itself.  */
13549       if (GET_CODE (operands[nops + i]) == SUBREG)
13550         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13551
13552       gcc_assert (MEM_P (operands[nops + i]));
13553
13554       /* Don't reorder volatile memory references; it doesn't seem worth
13555          looking for the case where the order is ok anyway.  */
13556       if (MEM_VOLATILE_P (operands[nops + i]))
13557         return 0;
13558
13559       offset = const0_rtx;
13560
13561       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13562            || (GET_CODE (reg) == SUBREG
13563                && REG_P (reg = SUBREG_REG (reg))))
13564           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13565               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13566                   || (GET_CODE (reg) == SUBREG
13567                       && REG_P (reg = SUBREG_REG (reg))))
13568               && (CONST_INT_P (offset
13569                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13570         {
13571           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13572                                   ? operands[i] : SUBREG_REG (operands[i]));
13573           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13574
13575           if (i == 0)
13576             {
13577               base_reg = REGNO (reg);
13578               base_reg_rtx = reg;
13579               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13580                 return 0;
13581             }
13582           else if (base_reg != (int) REGNO (reg))
13583             /* Not addressed from the same base register.  */
13584             return 0;
13585
13586           /* If it isn't an integer register, then we can't do this.  */
13587           if (unsorted_regs[i] < 0
13588               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13589               /* The effects are unpredictable if the base register is
13590                  both updated and stored.  */
13591               || (base_writeback && unsorted_regs[i] == base_reg)
13592               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13593               || unsorted_regs[i] > 14)
13594             return 0;
13595
13596           unsorted_offsets[i] = INTVAL (offset);
13597           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13598             order[0] = i;
13599         }
13600       else
13601         /* Not a suitable memory address.  */
13602         return 0;
13603     }
13604
13605   /* All the useful information has now been extracted from the
13606      operands into unsorted_regs and unsorted_offsets; additionally,
13607      order[0] has been set to the lowest offset in the list.  Sort
13608      the offsets into order, verifying that they are adjacent, and
13609      check that the register numbers are ascending.  */
13610   if (!compute_offset_order (nops, unsorted_offsets, order,
13611                              check_regs ? unsorted_regs : NULL))
13612     return 0;
13613
13614   if (saved_order)
13615     memcpy (saved_order, order, sizeof order);
13616
13617   if (base)
13618     {
13619       *base = base_reg;
13620
13621       for (i = 0; i < nops; i++)
13622         {
13623           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13624           if (reg_rtxs)
13625             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13626         }
13627
13628       *load_offset = unsorted_offsets[order[0]];
13629     }
13630
13631   if (TARGET_THUMB1
13632       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13633     return 0;
13634
13635   if (unsorted_offsets[order[0]] == 0)
13636     stm_case = 1; /* stmia */
13637   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13638     stm_case = 2; /* stmib */
13639   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13640     stm_case = 3; /* stmda */
13641   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13642     stm_case = 4; /* stmdb */
13643   else
13644     return 0;
13645
13646   if (!multiple_operation_profitable_p (false, nops, 0))
13647     return 0;
13648
13649   return stm_case;
13650 }
13651 \f
13652 /* Routines for use in generating RTL.  */
13653
13654 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13655    the instruction; REGS and MEMS are arrays containing the operands.
13656    BASEREG is the base register to be used in addressing the memory operands.
13657    WBACK_OFFSET is nonzero if the instruction should update the base
13658    register.  */
13659
13660 static rtx
13661 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13662                          HOST_WIDE_INT wback_offset)
13663 {
13664   int i = 0, j;
13665   rtx result;
13666
13667   if (!multiple_operation_profitable_p (false, count, 0))
13668     {
13669       rtx seq;
13670
13671       start_sequence ();
13672
13673       for (i = 0; i < count; i++)
13674         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13675
13676       if (wback_offset != 0)
13677         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13678
13679       seq = get_insns ();
13680       end_sequence ();
13681
13682       return seq;
13683     }
13684
13685   result = gen_rtx_PARALLEL (VOIDmode,
13686                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13687   if (wback_offset != 0)
13688     {
13689       XVECEXP (result, 0, 0)
13690         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13691       i = 1;
13692       count++;
13693     }
13694
13695   for (j = 0; i < count; i++, j++)
13696     XVECEXP (result, 0, i)
13697       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13698
13699   return result;
13700 }
13701
13702 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13703    the instruction; REGS and MEMS are arrays containing the operands.
13704    BASEREG is the base register to be used in addressing the memory operands.
13705    WBACK_OFFSET is nonzero if the instruction should update the base
13706    register.  */
13707
13708 static rtx
13709 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13710                           HOST_WIDE_INT wback_offset)
13711 {
13712   int i = 0, j;
13713   rtx result;
13714
13715   if (GET_CODE (basereg) == PLUS)
13716     basereg = XEXP (basereg, 0);
13717
13718   if (!multiple_operation_profitable_p (false, count, 0))
13719     {
13720       rtx seq;
13721
13722       start_sequence ();
13723
13724       for (i = 0; i < count; i++)
13725         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13726
13727       if (wback_offset != 0)
13728         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13729
13730       seq = get_insns ();
13731       end_sequence ();
13732
13733       return seq;
13734     }
13735
13736   result = gen_rtx_PARALLEL (VOIDmode,
13737                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13738   if (wback_offset != 0)
13739     {
13740       XVECEXP (result, 0, 0)
13741         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13742       i = 1;
13743       count++;
13744     }
13745
13746   for (j = 0; i < count; i++, j++)
13747     XVECEXP (result, 0, i)
13748       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13749
13750   return result;
13751 }
13752
13753 /* Generate either a load-multiple or a store-multiple instruction.  This
13754    function can be used in situations where we can start with a single MEM
13755    rtx and adjust its address upwards.
13756    COUNT is the number of operations in the instruction, not counting a
13757    possible update of the base register.  REGS is an array containing the
13758    register operands.
13759    BASEREG is the base register to be used in addressing the memory operands,
13760    which are constructed from BASEMEM.
13761    WRITE_BACK specifies whether the generated instruction should include an
13762    update of the base register.
13763    OFFSETP is used to pass an offset to and from this function; this offset
13764    is not used when constructing the address (instead BASEMEM should have an
13765    appropriate offset in its address), it is used only for setting
13766    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13767
13768 static rtx
13769 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13770                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13771 {
13772   rtx mems[MAX_LDM_STM_OPS];
13773   HOST_WIDE_INT offset = *offsetp;
13774   int i;
13775
13776   gcc_assert (count <= MAX_LDM_STM_OPS);
13777
13778   if (GET_CODE (basereg) == PLUS)
13779     basereg = XEXP (basereg, 0);
13780
13781   for (i = 0; i < count; i++)
13782     {
13783       rtx addr = plus_constant (Pmode, basereg, i * 4);
13784       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13785       offset += 4;
13786     }
13787
13788   if (write_back)
13789     *offsetp = offset;
13790
13791   if (is_load)
13792     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13793                                     write_back ? 4 * count : 0);
13794   else
13795     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13796                                      write_back ? 4 * count : 0);
13797 }
13798
13799 rtx
13800 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13801                        rtx basemem, HOST_WIDE_INT *offsetp)
13802 {
13803   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13804                               offsetp);
13805 }
13806
13807 rtx
13808 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13809                         rtx basemem, HOST_WIDE_INT *offsetp)
13810 {
13811   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13812                               offsetp);
13813 }
13814
13815 /* Called from a peephole2 expander to turn a sequence of loads into an
13816    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13817    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13818    is true if we can reorder the registers because they are used commutatively
13819    subsequently.
13820    Returns true iff we could generate a new instruction.  */
13821
13822 bool
13823 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13824 {
13825   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13826   rtx mems[MAX_LDM_STM_OPS];
13827   int i, j, base_reg;
13828   rtx base_reg_rtx;
13829   HOST_WIDE_INT offset;
13830   int write_back = FALSE;
13831   int ldm_case;
13832   rtx addr;
13833
13834   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13835                                      &base_reg, &offset, !sort_regs);
13836
13837   if (ldm_case == 0)
13838     return false;
13839
13840   if (sort_regs)
13841     for (i = 0; i < nops - 1; i++)
13842       for (j = i + 1; j < nops; j++)
13843         if (regs[i] > regs[j])
13844           {
13845             int t = regs[i];
13846             regs[i] = regs[j];
13847             regs[j] = t;
13848           }
13849   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13850
13851   if (TARGET_THUMB1)
13852     {
13853       gcc_assert (ldm_case == 1 || ldm_case == 5);
13854
13855       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
13856       write_back = true;
13857       for (i = 0; i < nops; i++)
13858         if (base_reg == regs[i])
13859           write_back = false;
13860
13861       /* Ensure the base is dead if it is updated.  */
13862       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
13863         return false;
13864     }
13865
13866   if (ldm_case == 5)
13867     {
13868       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13869       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13870       offset = 0;
13871       base_reg_rtx = newbase;
13872     }
13873
13874   for (i = 0; i < nops; i++)
13875     {
13876       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13877       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13878                                               SImode, addr, 0);
13879     }
13880   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13881                                       write_back ? offset + i * 4 : 0));
13882   return true;
13883 }
13884
13885 /* Called from a peephole2 expander to turn a sequence of stores into an
13886    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13887    NOPS indicates how many separate stores we are trying to combine.
13888    Returns true iff we could generate a new instruction.  */
13889
13890 bool
13891 gen_stm_seq (rtx *operands, int nops)
13892 {
13893   int i;
13894   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13895   rtx mems[MAX_LDM_STM_OPS];
13896   int base_reg;
13897   rtx base_reg_rtx;
13898   HOST_WIDE_INT offset;
13899   int write_back = FALSE;
13900   int stm_case;
13901   rtx addr;
13902   bool base_reg_dies;
13903
13904   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13905                                       mem_order, &base_reg, &offset, true);
13906
13907   if (stm_case == 0)
13908     return false;
13909
13910   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13911
13912   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13913   if (TARGET_THUMB1)
13914     {
13915       gcc_assert (base_reg_dies);
13916       write_back = TRUE;
13917     }
13918
13919   if (stm_case == 5)
13920     {
13921       gcc_assert (base_reg_dies);
13922       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13923       offset = 0;
13924     }
13925
13926   addr = plus_constant (Pmode, base_reg_rtx, offset);
13927
13928   for (i = 0; i < nops; i++)
13929     {
13930       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13931       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13932                                               SImode, addr, 0);
13933     }
13934   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13935                                        write_back ? offset + i * 4 : 0));
13936   return true;
13937 }
13938
13939 /* Called from a peephole2 expander to turn a sequence of stores that are
13940    preceded by constant loads into an STM instruction.  OPERANDS are the
13941    operands found by the peephole matcher; NOPS indicates how many
13942    separate stores we are trying to combine; there are 2 * NOPS
13943    instructions in the peephole.
13944    Returns true iff we could generate a new instruction.  */
13945
13946 bool
13947 gen_const_stm_seq (rtx *operands, int nops)
13948 {
13949   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13950   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13951   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13952   rtx mems[MAX_LDM_STM_OPS];
13953   int base_reg;
13954   rtx base_reg_rtx;
13955   HOST_WIDE_INT offset;
13956   int write_back = FALSE;
13957   int stm_case;
13958   rtx addr;
13959   bool base_reg_dies;
13960   int i, j;
13961   HARD_REG_SET allocated;
13962
13963   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13964                                       mem_order, &base_reg, &offset, false);
13965
13966   if (stm_case == 0)
13967     return false;
13968
13969   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13970
13971   /* If the same register is used more than once, try to find a free
13972      register.  */
13973   CLEAR_HARD_REG_SET (allocated);
13974   for (i = 0; i < nops; i++)
13975     {
13976       for (j = i + 1; j < nops; j++)
13977         if (regs[i] == regs[j])
13978           {
13979             rtx t = peep2_find_free_register (0, nops * 2,
13980                                               TARGET_THUMB1 ? "l" : "r",
13981                                               SImode, &allocated);
13982             if (t == NULL_RTX)
13983               return false;
13984             reg_rtxs[i] = t;
13985             regs[i] = REGNO (t);
13986           }
13987     }
13988
13989   /* Compute an ordering that maps the register numbers to an ascending
13990      sequence.  */
13991   reg_order[0] = 0;
13992   for (i = 0; i < nops; i++)
13993     if (regs[i] < regs[reg_order[0]])
13994       reg_order[0] = i;
13995
13996   for (i = 1; i < nops; i++)
13997     {
13998       int this_order = reg_order[i - 1];
13999       for (j = 0; j < nops; j++)
14000         if (regs[j] > regs[reg_order[i - 1]]
14001             && (this_order == reg_order[i - 1]
14002                 || regs[j] < regs[this_order]))
14003           this_order = j;
14004       reg_order[i] = this_order;
14005     }
14006
14007   /* Ensure that registers that must be live after the instruction end
14008      up with the correct value.  */
14009   for (i = 0; i < nops; i++)
14010     {
14011       int this_order = reg_order[i];
14012       if ((this_order != mem_order[i]
14013            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14014           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14015         return false;
14016     }
14017
14018   /* Load the constants.  */
14019   for (i = 0; i < nops; i++)
14020     {
14021       rtx op = operands[2 * nops + mem_order[i]];
14022       sorted_regs[i] = regs[reg_order[i]];
14023       emit_move_insn (reg_rtxs[reg_order[i]], op);
14024     }
14025
14026   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14027
14028   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14029   if (TARGET_THUMB1)
14030     {
14031       gcc_assert (base_reg_dies);
14032       write_back = TRUE;
14033     }
14034
14035   if (stm_case == 5)
14036     {
14037       gcc_assert (base_reg_dies);
14038       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14039       offset = 0;
14040     }
14041
14042   addr = plus_constant (Pmode, base_reg_rtx, offset);
14043
14044   for (i = 0; i < nops; i++)
14045     {
14046       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14047       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14048                                               SImode, addr, 0);
14049     }
14050   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14051                                        write_back ? offset + i * 4 : 0));
14052   return true;
14053 }
14054
14055 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14056    unaligned copies on processors which support unaligned semantics for those
14057    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14058    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14059    An interleave factor of 1 (the minimum) will perform no interleaving.
14060    Load/store multiple are used for aligned addresses where possible.  */
14061
14062 static void
14063 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14064                                    HOST_WIDE_INT length,
14065                                    unsigned int interleave_factor)
14066 {
14067   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14068   int *regnos = XALLOCAVEC (int, interleave_factor);
14069   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14070   HOST_WIDE_INT i, j;
14071   HOST_WIDE_INT remaining = length, words;
14072   rtx halfword_tmp = NULL, byte_tmp = NULL;
14073   rtx dst, src;
14074   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14075   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14076   HOST_WIDE_INT srcoffset, dstoffset;
14077   HOST_WIDE_INT src_autoinc, dst_autoinc;
14078   rtx mem, addr;
14079
14080   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14081
14082   /* Use hard registers if we have aligned source or destination so we can use
14083      load/store multiple with contiguous registers.  */
14084   if (dst_aligned || src_aligned)
14085     for (i = 0; i < interleave_factor; i++)
14086       regs[i] = gen_rtx_REG (SImode, i);
14087   else
14088     for (i = 0; i < interleave_factor; i++)
14089       regs[i] = gen_reg_rtx (SImode);
14090
14091   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14092   src = copy_addr_to_reg (XEXP (srcbase, 0));
14093
14094   srcoffset = dstoffset = 0;
14095
14096   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14097      For copying the last bytes we want to subtract this offset again.  */
14098   src_autoinc = dst_autoinc = 0;
14099
14100   for (i = 0; i < interleave_factor; i++)
14101     regnos[i] = i;
14102
14103   /* Copy BLOCK_SIZE_BYTES chunks.  */
14104
14105   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14106     {
14107       /* Load words.  */
14108       if (src_aligned && interleave_factor > 1)
14109         {
14110           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14111                                             TRUE, srcbase, &srcoffset));
14112           src_autoinc += UNITS_PER_WORD * interleave_factor;
14113         }
14114       else
14115         {
14116           for (j = 0; j < interleave_factor; j++)
14117             {
14118               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14119                                                  - src_autoinc));
14120               mem = adjust_automodify_address (srcbase, SImode, addr,
14121                                                srcoffset + j * UNITS_PER_WORD);
14122               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14123             }
14124           srcoffset += block_size_bytes;
14125         }
14126
14127       /* Store words.  */
14128       if (dst_aligned && interleave_factor > 1)
14129         {
14130           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14131                                              TRUE, dstbase, &dstoffset));
14132           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14133         }
14134       else
14135         {
14136           for (j = 0; j < interleave_factor; j++)
14137             {
14138               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14139                                                  - dst_autoinc));
14140               mem = adjust_automodify_address (dstbase, SImode, addr,
14141                                                dstoffset + j * UNITS_PER_WORD);
14142               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14143             }
14144           dstoffset += block_size_bytes;
14145         }
14146
14147       remaining -= block_size_bytes;
14148     }
14149
14150   /* Copy any whole words left (note these aren't interleaved with any
14151      subsequent halfword/byte load/stores in the interests of simplicity).  */
14152
14153   words = remaining / UNITS_PER_WORD;
14154
14155   gcc_assert (words < interleave_factor);
14156
14157   if (src_aligned && words > 1)
14158     {
14159       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14160                                         &srcoffset));
14161       src_autoinc += UNITS_PER_WORD * words;
14162     }
14163   else
14164     {
14165       for (j = 0; j < words; j++)
14166         {
14167           addr = plus_constant (Pmode, src,
14168                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14169           mem = adjust_automodify_address (srcbase, SImode, addr,
14170                                            srcoffset + j * UNITS_PER_WORD);
14171           if (src_aligned)
14172             emit_move_insn (regs[j], mem);
14173           else
14174             emit_insn (gen_unaligned_loadsi (regs[j], mem));
14175         }
14176       srcoffset += words * UNITS_PER_WORD;
14177     }
14178
14179   if (dst_aligned && words > 1)
14180     {
14181       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14182                                          &dstoffset));
14183       dst_autoinc += words * UNITS_PER_WORD;
14184     }
14185   else
14186     {
14187       for (j = 0; j < words; j++)
14188         {
14189           addr = plus_constant (Pmode, dst,
14190                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14191           mem = adjust_automodify_address (dstbase, SImode, addr,
14192                                            dstoffset + j * UNITS_PER_WORD);
14193           if (dst_aligned)
14194             emit_move_insn (mem, regs[j]);
14195           else
14196             emit_insn (gen_unaligned_storesi (mem, regs[j]));
14197         }
14198       dstoffset += words * UNITS_PER_WORD;
14199     }
14200
14201   remaining -= words * UNITS_PER_WORD;
14202
14203   gcc_assert (remaining < 4);
14204
14205   /* Copy a halfword if necessary.  */
14206
14207   if (remaining >= 2)
14208     {
14209       halfword_tmp = gen_reg_rtx (SImode);
14210
14211       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14212       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14213       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14214
14215       /* Either write out immediately, or delay until we've loaded the last
14216          byte, depending on interleave factor.  */
14217       if (interleave_factor == 1)
14218         {
14219           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14220           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14221           emit_insn (gen_unaligned_storehi (mem,
14222                        gen_lowpart (HImode, halfword_tmp)));
14223           halfword_tmp = NULL;
14224           dstoffset += 2;
14225         }
14226
14227       remaining -= 2;
14228       srcoffset += 2;
14229     }
14230
14231   gcc_assert (remaining < 2);
14232
14233   /* Copy last byte.  */
14234
14235   if ((remaining & 1) != 0)
14236     {
14237       byte_tmp = gen_reg_rtx (SImode);
14238
14239       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14240       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14241       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14242
14243       if (interleave_factor == 1)
14244         {
14245           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14246           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14247           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14248           byte_tmp = NULL;
14249           dstoffset++;
14250         }
14251
14252       remaining--;
14253       srcoffset++;
14254     }
14255
14256   /* Store last halfword if we haven't done so already.  */
14257
14258   if (halfword_tmp)
14259     {
14260       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14261       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14262       emit_insn (gen_unaligned_storehi (mem,
14263                    gen_lowpart (HImode, halfword_tmp)));
14264       dstoffset += 2;
14265     }
14266
14267   /* Likewise for last byte.  */
14268
14269   if (byte_tmp)
14270     {
14271       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14272       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14273       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14274       dstoffset++;
14275     }
14276
14277   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14278 }
14279
14280 /* From mips_adjust_block_mem:
14281
14282    Helper function for doing a loop-based block operation on memory
14283    reference MEM.  Each iteration of the loop will operate on LENGTH
14284    bytes of MEM.
14285
14286    Create a new base register for use within the loop and point it to
14287    the start of MEM.  Create a new memory reference that uses this
14288    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14289
14290 static void
14291 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14292                       rtx *loop_mem)
14293 {
14294   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14295
14296   /* Although the new mem does not refer to a known location,
14297      it does keep up to LENGTH bytes of alignment.  */
14298   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14299   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14300 }
14301
14302 /* From mips_block_move_loop:
14303
14304    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14305    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14306    the memory regions do not overlap.  */
14307
14308 static void
14309 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14310                                unsigned int interleave_factor,
14311                                HOST_WIDE_INT bytes_per_iter)
14312 {
14313   rtx src_reg, dest_reg, final_src, test;
14314   HOST_WIDE_INT leftover;
14315
14316   leftover = length % bytes_per_iter;
14317   length -= leftover;
14318
14319   /* Create registers and memory references for use within the loop.  */
14320   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14321   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14322
14323   /* Calculate the value that SRC_REG should have after the last iteration of
14324      the loop.  */
14325   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14326                                    0, 0, OPTAB_WIDEN);
14327
14328   /* Emit the start of the loop.  */
14329   rtx_code_label *label = gen_label_rtx ();
14330   emit_label (label);
14331
14332   /* Emit the loop body.  */
14333   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14334                                      interleave_factor);
14335
14336   /* Move on to the next block.  */
14337   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14338   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14339
14340   /* Emit the loop condition.  */
14341   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14342   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14343
14344   /* Mop up any left-over bytes.  */
14345   if (leftover)
14346     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14347 }
14348
14349 /* Emit a block move when either the source or destination is unaligned (not
14350    aligned to a four-byte boundary).  This may need further tuning depending on
14351    core type, optimize_size setting, etc.  */
14352
14353 static int
14354 arm_cpymemqi_unaligned (rtx *operands)
14355 {
14356   HOST_WIDE_INT length = INTVAL (operands[2]);
14357
14358   if (optimize_size)
14359     {
14360       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14361       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14362       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14363          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14364          or dst_aligned though: allow more interleaving in those cases since the
14365          resulting code can be smaller.  */
14366       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14367       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14368
14369       if (length > 12)
14370         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14371                                        interleave_factor, bytes_per_iter);
14372       else
14373         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14374                                            interleave_factor);
14375     }
14376   else
14377     {
14378       /* Note that the loop created by arm_block_move_unaligned_loop may be
14379          subject to loop unrolling, which makes tuning this condition a little
14380          redundant.  */
14381       if (length > 32)
14382         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14383       else
14384         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14385     }
14386
14387   return 1;
14388 }
14389
14390 int
14391 arm_gen_cpymemqi (rtx *operands)
14392 {
14393   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14394   HOST_WIDE_INT srcoffset, dstoffset;
14395   rtx src, dst, srcbase, dstbase;
14396   rtx part_bytes_reg = NULL;
14397   rtx mem;
14398
14399   if (!CONST_INT_P (operands[2])
14400       || !CONST_INT_P (operands[3])
14401       || INTVAL (operands[2]) > 64)
14402     return 0;
14403
14404   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14405     return arm_cpymemqi_unaligned (operands);
14406
14407   if (INTVAL (operands[3]) & 3)
14408     return 0;
14409
14410   dstbase = operands[0];
14411   srcbase = operands[1];
14412
14413   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14414   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14415
14416   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14417   out_words_to_go = INTVAL (operands[2]) / 4;
14418   last_bytes = INTVAL (operands[2]) & 3;
14419   dstoffset = srcoffset = 0;
14420
14421   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14422     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14423
14424   while (in_words_to_go >= 2)
14425     {
14426       if (in_words_to_go > 4)
14427         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14428                                           TRUE, srcbase, &srcoffset));
14429       else
14430         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14431                                           src, FALSE, srcbase,
14432                                           &srcoffset));
14433
14434       if (out_words_to_go)
14435         {
14436           if (out_words_to_go > 4)
14437             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14438                                                TRUE, dstbase, &dstoffset));
14439           else if (out_words_to_go != 1)
14440             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14441                                                out_words_to_go, dst,
14442                                                (last_bytes == 0
14443                                                 ? FALSE : TRUE),
14444                                                dstbase, &dstoffset));
14445           else
14446             {
14447               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14448               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14449               if (last_bytes != 0)
14450                 {
14451                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14452                   dstoffset += 4;
14453                 }
14454             }
14455         }
14456
14457       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14458       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14459     }
14460
14461   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14462   if (out_words_to_go)
14463     {
14464       rtx sreg;
14465
14466       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14467       sreg = copy_to_reg (mem);
14468
14469       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14470       emit_move_insn (mem, sreg);
14471       in_words_to_go--;
14472
14473       gcc_assert (!in_words_to_go);     /* Sanity check */
14474     }
14475
14476   if (in_words_to_go)
14477     {
14478       gcc_assert (in_words_to_go > 0);
14479
14480       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14481       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14482     }
14483
14484   gcc_assert (!last_bytes || part_bytes_reg);
14485
14486   if (BYTES_BIG_ENDIAN && last_bytes)
14487     {
14488       rtx tmp = gen_reg_rtx (SImode);
14489
14490       /* The bytes we want are in the top end of the word.  */
14491       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14492                               GEN_INT (8 * (4 - last_bytes))));
14493       part_bytes_reg = tmp;
14494
14495       while (last_bytes)
14496         {
14497           mem = adjust_automodify_address (dstbase, QImode,
14498                                            plus_constant (Pmode, dst,
14499                                                           last_bytes - 1),
14500                                            dstoffset + last_bytes - 1);
14501           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14502
14503           if (--last_bytes)
14504             {
14505               tmp = gen_reg_rtx (SImode);
14506               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14507               part_bytes_reg = tmp;
14508             }
14509         }
14510
14511     }
14512   else
14513     {
14514       if (last_bytes > 1)
14515         {
14516           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14517           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14518           last_bytes -= 2;
14519           if (last_bytes)
14520             {
14521               rtx tmp = gen_reg_rtx (SImode);
14522               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14523               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14524               part_bytes_reg = tmp;
14525               dstoffset += 2;
14526             }
14527         }
14528
14529       if (last_bytes)
14530         {
14531           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14532           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14533         }
14534     }
14535
14536   return 1;
14537 }
14538
14539 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14540 by mode size.  */
14541 inline static rtx
14542 next_consecutive_mem (rtx mem)
14543 {
14544   machine_mode mode = GET_MODE (mem);
14545   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14546   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14547
14548   return adjust_automodify_address (mem, mode, addr, offset);
14549 }
14550
14551 /* Copy using LDRD/STRD instructions whenever possible.
14552    Returns true upon success. */
14553 bool
14554 gen_cpymem_ldrd_strd (rtx *operands)
14555 {
14556   unsigned HOST_WIDE_INT len;
14557   HOST_WIDE_INT align;
14558   rtx src, dst, base;
14559   rtx reg0;
14560   bool src_aligned, dst_aligned;
14561   bool src_volatile, dst_volatile;
14562
14563   gcc_assert (CONST_INT_P (operands[2]));
14564   gcc_assert (CONST_INT_P (operands[3]));
14565
14566   len = UINTVAL (operands[2]);
14567   if (len > 64)
14568     return false;
14569
14570   /* Maximum alignment we can assume for both src and dst buffers.  */
14571   align = INTVAL (operands[3]);
14572
14573   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14574     return false;
14575
14576   /* Place src and dst addresses in registers
14577      and update the corresponding mem rtx.  */
14578   dst = operands[0];
14579   dst_volatile = MEM_VOLATILE_P (dst);
14580   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14581   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14582   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14583
14584   src = operands[1];
14585   src_volatile = MEM_VOLATILE_P (src);
14586   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14587   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14588   src = adjust_automodify_address (src, VOIDmode, base, 0);
14589
14590   if (!unaligned_access && !(src_aligned && dst_aligned))
14591     return false;
14592
14593   if (src_volatile || dst_volatile)
14594     return false;
14595
14596   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14597   if (!(dst_aligned || src_aligned))
14598     return arm_gen_cpymemqi (operands);
14599
14600   /* If the either src or dst is unaligned we'll be accessing it as pairs
14601      of unaligned SImode accesses.  Otherwise we can generate DImode
14602      ldrd/strd instructions.  */
14603   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14604   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14605
14606   while (len >= 8)
14607     {
14608       len -= 8;
14609       reg0 = gen_reg_rtx (DImode);
14610       rtx low_reg = NULL_RTX;
14611       rtx hi_reg = NULL_RTX;
14612
14613       if (!src_aligned || !dst_aligned)
14614         {
14615           low_reg = gen_lowpart (SImode, reg0);
14616           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14617         }
14618       if (src_aligned)
14619         emit_move_insn (reg0, src);
14620       else
14621         {
14622           emit_insn (gen_unaligned_loadsi (low_reg, src));
14623           src = next_consecutive_mem (src);
14624           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14625         }
14626
14627       if (dst_aligned)
14628         emit_move_insn (dst, reg0);
14629       else
14630         {
14631           emit_insn (gen_unaligned_storesi (dst, low_reg));
14632           dst = next_consecutive_mem (dst);
14633           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14634         }
14635
14636       src = next_consecutive_mem (src);
14637       dst = next_consecutive_mem (dst);
14638     }
14639
14640   gcc_assert (len < 8);
14641   if (len >= 4)
14642     {
14643       /* More than a word but less than a double-word to copy.  Copy a word.  */
14644       reg0 = gen_reg_rtx (SImode);
14645       src = adjust_address (src, SImode, 0);
14646       dst = adjust_address (dst, SImode, 0);
14647       if (src_aligned)
14648         emit_move_insn (reg0, src);
14649       else
14650         emit_insn (gen_unaligned_loadsi (reg0, src));
14651
14652       if (dst_aligned)
14653         emit_move_insn (dst, reg0);
14654       else
14655         emit_insn (gen_unaligned_storesi (dst, reg0));
14656
14657       src = next_consecutive_mem (src);
14658       dst = next_consecutive_mem (dst);
14659       len -= 4;
14660     }
14661
14662   if (len == 0)
14663     return true;
14664
14665   /* Copy the remaining bytes.  */
14666   if (len >= 2)
14667     {
14668       dst = adjust_address (dst, HImode, 0);
14669       src = adjust_address (src, HImode, 0);
14670       reg0 = gen_reg_rtx (SImode);
14671       if (src_aligned)
14672         emit_insn (gen_zero_extendhisi2 (reg0, src));
14673       else
14674         emit_insn (gen_unaligned_loadhiu (reg0, src));
14675
14676       if (dst_aligned)
14677         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14678       else
14679         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14680
14681       src = next_consecutive_mem (src);
14682       dst = next_consecutive_mem (dst);
14683       if (len == 2)
14684         return true;
14685     }
14686
14687   dst = adjust_address (dst, QImode, 0);
14688   src = adjust_address (src, QImode, 0);
14689   reg0 = gen_reg_rtx (QImode);
14690   emit_move_insn (reg0, src);
14691   emit_move_insn (dst, reg0);
14692   return true;
14693 }
14694
14695 /* Select a dominance comparison mode if possible for a test of the general
14696    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14697    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14698    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14699    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14700    In all cases OP will be either EQ or NE, but we don't need to know which
14701    here.  If we are unable to support a dominance comparison we return
14702    CC mode.  This will then fail to match for the RTL expressions that
14703    generate this call.  */
14704 machine_mode
14705 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14706 {
14707   enum rtx_code cond1, cond2;
14708   int swapped = 0;
14709
14710   /* Currently we will probably get the wrong result if the individual
14711      comparisons are not simple.  This also ensures that it is safe to
14712      reverse a comparison if necessary.  */
14713   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14714        != CCmode)
14715       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14716           != CCmode))
14717     return CCmode;
14718
14719   /* The if_then_else variant of this tests the second condition if the
14720      first passes, but is true if the first fails.  Reverse the first
14721      condition to get a true "inclusive-or" expression.  */
14722   if (cond_or == DOM_CC_NX_OR_Y)
14723     cond1 = reverse_condition (cond1);
14724
14725   /* If the comparisons are not equal, and one doesn't dominate the other,
14726      then we can't do this.  */
14727   if (cond1 != cond2
14728       && !comparison_dominates_p (cond1, cond2)
14729       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14730     return CCmode;
14731
14732   if (swapped)
14733     std::swap (cond1, cond2);
14734
14735   switch (cond1)
14736     {
14737     case EQ:
14738       if (cond_or == DOM_CC_X_AND_Y)
14739         return CC_DEQmode;
14740
14741       switch (cond2)
14742         {
14743         case EQ: return CC_DEQmode;
14744         case LE: return CC_DLEmode;
14745         case LEU: return CC_DLEUmode;
14746         case GE: return CC_DGEmode;
14747         case GEU: return CC_DGEUmode;
14748         default: gcc_unreachable ();
14749         }
14750
14751     case LT:
14752       if (cond_or == DOM_CC_X_AND_Y)
14753         return CC_DLTmode;
14754
14755       switch (cond2)
14756         {
14757         case  LT:
14758             return CC_DLTmode;
14759         case LE:
14760           return CC_DLEmode;
14761         case NE:
14762           return CC_DNEmode;
14763         default:
14764           gcc_unreachable ();
14765         }
14766
14767     case GT:
14768       if (cond_or == DOM_CC_X_AND_Y)
14769         return CC_DGTmode;
14770
14771       switch (cond2)
14772         {
14773         case GT:
14774           return CC_DGTmode;
14775         case GE:
14776           return CC_DGEmode;
14777         case NE:
14778           return CC_DNEmode;
14779         default:
14780           gcc_unreachable ();
14781         }
14782
14783     case LTU:
14784       if (cond_or == DOM_CC_X_AND_Y)
14785         return CC_DLTUmode;
14786
14787       switch (cond2)
14788         {
14789         case LTU:
14790           return CC_DLTUmode;
14791         case LEU:
14792           return CC_DLEUmode;
14793         case NE:
14794           return CC_DNEmode;
14795         default:
14796           gcc_unreachable ();
14797         }
14798
14799     case GTU:
14800       if (cond_or == DOM_CC_X_AND_Y)
14801         return CC_DGTUmode;
14802
14803       switch (cond2)
14804         {
14805         case GTU:
14806           return CC_DGTUmode;
14807         case GEU:
14808           return CC_DGEUmode;
14809         case NE:
14810           return CC_DNEmode;
14811         default:
14812           gcc_unreachable ();
14813         }
14814
14815     /* The remaining cases only occur when both comparisons are the
14816        same.  */
14817     case NE:
14818       gcc_assert (cond1 == cond2);
14819       return CC_DNEmode;
14820
14821     case LE:
14822       gcc_assert (cond1 == cond2);
14823       return CC_DLEmode;
14824
14825     case GE:
14826       gcc_assert (cond1 == cond2);
14827       return CC_DGEmode;
14828
14829     case LEU:
14830       gcc_assert (cond1 == cond2);
14831       return CC_DLEUmode;
14832
14833     case GEU:
14834       gcc_assert (cond1 == cond2);
14835       return CC_DGEUmode;
14836
14837     default:
14838       gcc_unreachable ();
14839     }
14840 }
14841
14842 machine_mode
14843 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14844 {
14845   /* All floating point compares return CCFP if it is an equality
14846      comparison, and CCFPE otherwise.  */
14847   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14848     {
14849       switch (op)
14850         {
14851         case EQ:
14852         case NE:
14853         case UNORDERED:
14854         case ORDERED:
14855         case UNLT:
14856         case UNLE:
14857         case UNGT:
14858         case UNGE:
14859         case UNEQ:
14860         case LTGT:
14861           return CCFPmode;
14862
14863         case LT:
14864         case LE:
14865         case GT:
14866         case GE:
14867           return CCFPEmode;
14868
14869         default:
14870           gcc_unreachable ();
14871         }
14872     }
14873
14874   /* A compare with a shifted operand.  Because of canonicalization, the
14875      comparison will have to be swapped when we emit the assembler.  */
14876   if (GET_MODE (y) == SImode
14877       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14878       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14879           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14880           || GET_CODE (x) == ROTATERT))
14881     return CC_SWPmode;
14882
14883   /* This operation is performed swapped, but since we only rely on the Z
14884      flag we don't need an additional mode.  */
14885   if (GET_MODE (y) == SImode
14886       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14887       && GET_CODE (x) == NEG
14888       && (op == EQ || op == NE))
14889     return CC_Zmode;
14890
14891   /* This is a special case that is used by combine to allow a
14892      comparison of a shifted byte load to be split into a zero-extend
14893      followed by a comparison of the shifted integer (only valid for
14894      equalities and unsigned inequalities).  */
14895   if (GET_MODE (x) == SImode
14896       && GET_CODE (x) == ASHIFT
14897       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14898       && GET_CODE (XEXP (x, 0)) == SUBREG
14899       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14900       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14901       && (op == EQ || op == NE
14902           || op == GEU || op == GTU || op == LTU || op == LEU)
14903       && CONST_INT_P (y))
14904     return CC_Zmode;
14905
14906   /* A construct for a conditional compare, if the false arm contains
14907      0, then both conditions must be true, otherwise either condition
14908      must be true.  Not all conditions are possible, so CCmode is
14909      returned if it can't be done.  */
14910   if (GET_CODE (x) == IF_THEN_ELSE
14911       && (XEXP (x, 2) == const0_rtx
14912           || XEXP (x, 2) == const1_rtx)
14913       && COMPARISON_P (XEXP (x, 0))
14914       && COMPARISON_P (XEXP (x, 1)))
14915     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14916                                          INTVAL (XEXP (x, 2)));
14917
14918   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14919   if (GET_CODE (x) == AND
14920       && (op == EQ || op == NE)
14921       && COMPARISON_P (XEXP (x, 0))
14922       && COMPARISON_P (XEXP (x, 1)))
14923     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14924                                          DOM_CC_X_AND_Y);
14925
14926   if (GET_CODE (x) == IOR
14927       && (op == EQ || op == NE)
14928       && COMPARISON_P (XEXP (x, 0))
14929       && COMPARISON_P (XEXP (x, 1)))
14930     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14931                                          DOM_CC_X_OR_Y);
14932
14933   /* An operation (on Thumb) where we want to test for a single bit.
14934      This is done by shifting that bit up into the top bit of a
14935      scratch register; we can then branch on the sign bit.  */
14936   if (TARGET_THUMB1
14937       && GET_MODE (x) == SImode
14938       && (op == EQ || op == NE)
14939       && GET_CODE (x) == ZERO_EXTRACT
14940       && XEXP (x, 1) == const1_rtx)
14941     return CC_Nmode;
14942
14943   /* An operation that sets the condition codes as a side-effect, the
14944      V flag is not set correctly, so we can only use comparisons where
14945      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14946      instead.)  */
14947   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14948   if (GET_MODE (x) == SImode
14949       && y == const0_rtx
14950       && (op == EQ || op == NE || op == LT || op == GE)
14951       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14952           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14953           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14954           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14955           || GET_CODE (x) == LSHIFTRT
14956           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14957           || GET_CODE (x) == ROTATERT
14958           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14959     return CC_NOOVmode;
14960
14961   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14962     return CC_Zmode;
14963
14964   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14965       && GET_CODE (x) == PLUS
14966       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14967     return CC_Cmode;
14968
14969   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14970     {
14971       switch (op)
14972         {
14973         case EQ:
14974         case NE:
14975           /* A DImode comparison against zero can be implemented by
14976              or'ing the two halves together.  */
14977           if (y == const0_rtx)
14978             return CC_Zmode;
14979
14980           /* We can do an equality test in three Thumb instructions.  */
14981           if (!TARGET_32BIT)
14982             return CC_Zmode;
14983
14984           /* FALLTHROUGH */
14985
14986         case LTU:
14987         case LEU:
14988         case GTU:
14989         case GEU:
14990           /* DImode unsigned comparisons can be implemented by cmp +
14991              cmpeq without a scratch register.  Not worth doing in
14992              Thumb-2.  */
14993           if (TARGET_32BIT)
14994             return CC_CZmode;
14995
14996           /* FALLTHROUGH */
14997
14998         case LT:
14999         case LE:
15000         case GT:
15001         case GE:
15002           /* DImode signed and unsigned comparisons can be implemented
15003              by cmp + sbcs with a scratch register, but that does not
15004              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15005           gcc_assert (op != EQ && op != NE);
15006           return CC_NCVmode;
15007
15008         default:
15009           gcc_unreachable ();
15010         }
15011     }
15012
15013   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15014     return GET_MODE (x);
15015
15016   return CCmode;
15017 }
15018
15019 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15020    return the rtx for register 0 in the proper mode.  FP means this is a
15021    floating point compare: I don't think that it is needed on the arm.  */
15022 rtx
15023 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15024 {
15025   machine_mode mode;
15026   rtx cc_reg;
15027   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15028
15029   /* We might have X as a constant, Y as a register because of the predicates
15030      used for cmpdi.  If so, force X to a register here.  */
15031   if (dimode_comparison && !REG_P (x))
15032     x = force_reg (DImode, x);
15033
15034   mode = SELECT_CC_MODE (code, x, y);
15035   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15036
15037   if (dimode_comparison
15038       && mode != CC_CZmode)
15039     {
15040       rtx clobber, set;
15041
15042       /* To compare two non-zero values for equality, XOR them and
15043          then compare against zero.  Not used for ARM mode; there
15044          CC_CZmode is cheaper.  */
15045       if (mode == CC_Zmode && y != const0_rtx)
15046         {
15047           gcc_assert (!reload_completed);
15048           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15049           y = const0_rtx;
15050         }
15051
15052       /* A scratch register is required.  */
15053       if (reload_completed)
15054         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15055       else
15056         scratch = gen_rtx_SCRATCH (SImode);
15057
15058       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15059       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15060       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15061     }
15062   else
15063     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15064
15065   return cc_reg;
15066 }
15067
15068 /* Generate a sequence of insns that will generate the correct return
15069    address mask depending on the physical architecture that the program
15070    is running on.  */
15071 rtx
15072 arm_gen_return_addr_mask (void)
15073 {
15074   rtx reg = gen_reg_rtx (Pmode);
15075
15076   emit_insn (gen_return_addr_mask (reg));
15077   return reg;
15078 }
15079
15080 void
15081 arm_reload_in_hi (rtx *operands)
15082 {
15083   rtx ref = operands[1];
15084   rtx base, scratch;
15085   HOST_WIDE_INT offset = 0;
15086
15087   if (GET_CODE (ref) == SUBREG)
15088     {
15089       offset = SUBREG_BYTE (ref);
15090       ref = SUBREG_REG (ref);
15091     }
15092
15093   if (REG_P (ref))
15094     {
15095       /* We have a pseudo which has been spilt onto the stack; there
15096          are two cases here: the first where there is a simple
15097          stack-slot replacement and a second where the stack-slot is
15098          out of range, or is used as a subreg.  */
15099       if (reg_equiv_mem (REGNO (ref)))
15100         {
15101           ref = reg_equiv_mem (REGNO (ref));
15102           base = find_replacement (&XEXP (ref, 0));
15103         }
15104       else
15105         /* The slot is out of range, or was dressed up in a SUBREG.  */
15106         base = reg_equiv_address (REGNO (ref));
15107
15108       /* PR 62554: If there is no equivalent memory location then just move
15109          the value as an SImode register move.  This happens when the target
15110          architecture variant does not have an HImode register move.  */
15111       if (base == NULL)
15112         {
15113           gcc_assert (REG_P (operands[0]));
15114           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15115                                 gen_rtx_SUBREG (SImode, ref, 0)));
15116           return;
15117         }
15118     }
15119   else
15120     base = find_replacement (&XEXP (ref, 0));
15121
15122   /* Handle the case where the address is too complex to be offset by 1.  */
15123   if (GET_CODE (base) == MINUS
15124       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15125     {
15126       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15127
15128       emit_set_insn (base_plus, base);
15129       base = base_plus;
15130     }
15131   else if (GET_CODE (base) == PLUS)
15132     {
15133       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15134       HOST_WIDE_INT hi, lo;
15135
15136       offset += INTVAL (XEXP (base, 1));
15137       base = XEXP (base, 0);
15138
15139       /* Rework the address into a legal sequence of insns.  */
15140       /* Valid range for lo is -4095 -> 4095 */
15141       lo = (offset >= 0
15142             ? (offset & 0xfff)
15143             : -((-offset) & 0xfff));
15144
15145       /* Corner case, if lo is the max offset then we would be out of range
15146          once we have added the additional 1 below, so bump the msb into the
15147          pre-loading insn(s).  */
15148       if (lo == 4095)
15149         lo &= 0x7ff;
15150
15151       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15152              ^ (HOST_WIDE_INT) 0x80000000)
15153             - (HOST_WIDE_INT) 0x80000000);
15154
15155       gcc_assert (hi + lo == offset);
15156
15157       if (hi != 0)
15158         {
15159           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15160
15161           /* Get the base address; addsi3 knows how to handle constants
15162              that require more than one insn.  */
15163           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15164           base = base_plus;
15165           offset = lo;
15166         }
15167     }
15168
15169   /* Operands[2] may overlap operands[0] (though it won't overlap
15170      operands[1]), that's why we asked for a DImode reg -- so we can
15171      use the bit that does not overlap.  */
15172   if (REGNO (operands[2]) == REGNO (operands[0]))
15173     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15174   else
15175     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15176
15177   emit_insn (gen_zero_extendqisi2 (scratch,
15178                                    gen_rtx_MEM (QImode,
15179                                                 plus_constant (Pmode, base,
15180                                                                offset))));
15181   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15182                                    gen_rtx_MEM (QImode,
15183                                                 plus_constant (Pmode, base,
15184                                                                offset + 1))));
15185   if (!BYTES_BIG_ENDIAN)
15186     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15187                    gen_rtx_IOR (SImode,
15188                                 gen_rtx_ASHIFT
15189                                 (SImode,
15190                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15191                                  GEN_INT (8)),
15192                                 scratch));
15193   else
15194     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15195                    gen_rtx_IOR (SImode,
15196                                 gen_rtx_ASHIFT (SImode, scratch,
15197                                                 GEN_INT (8)),
15198                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15199 }
15200
15201 /* Handle storing a half-word to memory during reload by synthesizing as two
15202    byte stores.  Take care not to clobber the input values until after we
15203    have moved them somewhere safe.  This code assumes that if the DImode
15204    scratch in operands[2] overlaps either the input value or output address
15205    in some way, then that value must die in this insn (we absolutely need
15206    two scratch registers for some corner cases).  */
15207 void
15208 arm_reload_out_hi (rtx *operands)
15209 {
15210   rtx ref = operands[0];
15211   rtx outval = operands[1];
15212   rtx base, scratch;
15213   HOST_WIDE_INT offset = 0;
15214
15215   if (GET_CODE (ref) == SUBREG)
15216     {
15217       offset = SUBREG_BYTE (ref);
15218       ref = SUBREG_REG (ref);
15219     }
15220
15221   if (REG_P (ref))
15222     {
15223       /* We have a pseudo which has been spilt onto the stack; there
15224          are two cases here: the first where there is a simple
15225          stack-slot replacement and a second where the stack-slot is
15226          out of range, or is used as a subreg.  */
15227       if (reg_equiv_mem (REGNO (ref)))
15228         {
15229           ref = reg_equiv_mem (REGNO (ref));
15230           base = find_replacement (&XEXP (ref, 0));
15231         }
15232       else
15233         /* The slot is out of range, or was dressed up in a SUBREG.  */
15234         base = reg_equiv_address (REGNO (ref));
15235
15236       /* PR 62254: If there is no equivalent memory location then just move
15237          the value as an SImode register move.  This happens when the target
15238          architecture variant does not have an HImode register move.  */
15239       if (base == NULL)
15240         {
15241           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15242
15243           if (REG_P (outval))
15244             {
15245               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15246                                     gen_rtx_SUBREG (SImode, outval, 0)));
15247             }
15248           else /* SUBREG_P (outval)  */
15249             {
15250               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15251                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15252                                       SUBREG_REG (outval)));
15253               else
15254                 /* FIXME: Handle other cases ?  */
15255                 gcc_unreachable ();
15256             }
15257           return;
15258         }
15259     }
15260   else
15261     base = find_replacement (&XEXP (ref, 0));
15262
15263   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15264
15265   /* Handle the case where the address is too complex to be offset by 1.  */
15266   if (GET_CODE (base) == MINUS
15267       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15268     {
15269       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15270
15271       /* Be careful not to destroy OUTVAL.  */
15272       if (reg_overlap_mentioned_p (base_plus, outval))
15273         {
15274           /* Updating base_plus might destroy outval, see if we can
15275              swap the scratch and base_plus.  */
15276           if (!reg_overlap_mentioned_p (scratch, outval))
15277             std::swap (scratch, base_plus);
15278           else
15279             {
15280               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15281
15282               /* Be conservative and copy OUTVAL into the scratch now,
15283                  this should only be necessary if outval is a subreg
15284                  of something larger than a word.  */
15285               /* XXX Might this clobber base?  I can't see how it can,
15286                  since scratch is known to overlap with OUTVAL, and
15287                  must be wider than a word.  */
15288               emit_insn (gen_movhi (scratch_hi, outval));
15289               outval = scratch_hi;
15290             }
15291         }
15292
15293       emit_set_insn (base_plus, base);
15294       base = base_plus;
15295     }
15296   else if (GET_CODE (base) == PLUS)
15297     {
15298       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15299       HOST_WIDE_INT hi, lo;
15300
15301       offset += INTVAL (XEXP (base, 1));
15302       base = XEXP (base, 0);
15303
15304       /* Rework the address into a legal sequence of insns.  */
15305       /* Valid range for lo is -4095 -> 4095 */
15306       lo = (offset >= 0
15307             ? (offset & 0xfff)
15308             : -((-offset) & 0xfff));
15309
15310       /* Corner case, if lo is the max offset then we would be out of range
15311          once we have added the additional 1 below, so bump the msb into the
15312          pre-loading insn(s).  */
15313       if (lo == 4095)
15314         lo &= 0x7ff;
15315
15316       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15317              ^ (HOST_WIDE_INT) 0x80000000)
15318             - (HOST_WIDE_INT) 0x80000000);
15319
15320       gcc_assert (hi + lo == offset);
15321
15322       if (hi != 0)
15323         {
15324           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15325
15326           /* Be careful not to destroy OUTVAL.  */
15327           if (reg_overlap_mentioned_p (base_plus, outval))
15328             {
15329               /* Updating base_plus might destroy outval, see if we
15330                  can swap the scratch and base_plus.  */
15331               if (!reg_overlap_mentioned_p (scratch, outval))
15332                 std::swap (scratch, base_plus);
15333               else
15334                 {
15335                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15336
15337                   /* Be conservative and copy outval into scratch now,
15338                      this should only be necessary if outval is a
15339                      subreg of something larger than a word.  */
15340                   /* XXX Might this clobber base?  I can't see how it
15341                      can, since scratch is known to overlap with
15342                      outval.  */
15343                   emit_insn (gen_movhi (scratch_hi, outval));
15344                   outval = scratch_hi;
15345                 }
15346             }
15347
15348           /* Get the base address; addsi3 knows how to handle constants
15349              that require more than one insn.  */
15350           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15351           base = base_plus;
15352           offset = lo;
15353         }
15354     }
15355
15356   if (BYTES_BIG_ENDIAN)
15357     {
15358       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15359                                          plus_constant (Pmode, base,
15360                                                         offset + 1)),
15361                             gen_lowpart (QImode, outval)));
15362       emit_insn (gen_lshrsi3 (scratch,
15363                               gen_rtx_SUBREG (SImode, outval, 0),
15364                               GEN_INT (8)));
15365       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15366                                                                 offset)),
15367                             gen_lowpart (QImode, scratch)));
15368     }
15369   else
15370     {
15371       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15372                                                                 offset)),
15373                             gen_lowpart (QImode, outval)));
15374       emit_insn (gen_lshrsi3 (scratch,
15375                               gen_rtx_SUBREG (SImode, outval, 0),
15376                               GEN_INT (8)));
15377       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15378                                          plus_constant (Pmode, base,
15379                                                         offset + 1)),
15380                             gen_lowpart (QImode, scratch)));
15381     }
15382 }
15383
15384 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15385    (padded to the size of a word) should be passed in a register.  */
15386
15387 static bool
15388 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15389 {
15390   if (TARGET_AAPCS_BASED)
15391     return must_pass_in_stack_var_size (mode, type);
15392   else
15393     return must_pass_in_stack_var_size_or_pad (mode, type);
15394 }
15395
15396
15397 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15398    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15399    the default.  For AAPCS based ABIs small aggregate types are placed
15400    in the lowest memory address.  */
15401
15402 static pad_direction
15403 arm_function_arg_padding (machine_mode mode, const_tree type)
15404 {
15405   if (!TARGET_AAPCS_BASED)
15406     return default_function_arg_padding (mode, type);
15407
15408   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15409     return PAD_DOWNWARD;
15410
15411   return PAD_UPWARD;
15412 }
15413
15414
15415 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15416    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15417    register has useful data, and return the opposite if the most
15418    significant byte does.  */
15419
15420 bool
15421 arm_pad_reg_upward (machine_mode mode,
15422                     tree type, int first ATTRIBUTE_UNUSED)
15423 {
15424   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15425     {
15426       /* For AAPCS, small aggregates, small fixed-point types,
15427          and small complex types are always padded upwards.  */
15428       if (type)
15429         {
15430           if ((AGGREGATE_TYPE_P (type)
15431                || TREE_CODE (type) == COMPLEX_TYPE
15432                || FIXED_POINT_TYPE_P (type))
15433               && int_size_in_bytes (type) <= 4)
15434             return true;
15435         }
15436       else
15437         {
15438           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15439               && GET_MODE_SIZE (mode) <= 4)
15440             return true;
15441         }
15442     }
15443
15444   /* Otherwise, use default padding.  */
15445   return !BYTES_BIG_ENDIAN;
15446 }
15447
15448 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15449    assuming that the address in the base register is word aligned.  */
15450 bool
15451 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15452 {
15453   HOST_WIDE_INT max_offset;
15454
15455   /* Offset must be a multiple of 4 in Thumb mode.  */
15456   if (TARGET_THUMB2 && ((offset & 3) != 0))
15457     return false;
15458
15459   if (TARGET_THUMB2)
15460     max_offset = 1020;
15461   else if (TARGET_ARM)
15462     max_offset = 255;
15463   else
15464     return false;
15465
15466   return ((offset <= max_offset) && (offset >= -max_offset));
15467 }
15468
15469 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15470    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15471    Assumes that the address in the base register RN is word aligned.  Pattern
15472    guarantees that both memory accesses use the same base register,
15473    the offsets are constants within the range, and the gap between the offsets is 4.
15474    If preload complete then check that registers are legal.  WBACK indicates whether
15475    address is updated.  LOAD indicates whether memory access is load or store.  */
15476 bool
15477 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15478                        bool wback, bool load)
15479 {
15480   unsigned int t, t2, n;
15481
15482   if (!reload_completed)
15483     return true;
15484
15485   if (!offset_ok_for_ldrd_strd (offset))
15486     return false;
15487
15488   t = REGNO (rt);
15489   t2 = REGNO (rt2);
15490   n = REGNO (rn);
15491
15492   if ((TARGET_THUMB2)
15493       && ((wback && (n == t || n == t2))
15494           || (t == SP_REGNUM)
15495           || (t == PC_REGNUM)
15496           || (t2 == SP_REGNUM)
15497           || (t2 == PC_REGNUM)
15498           || (!load && (n == PC_REGNUM))
15499           || (load && (t == t2))
15500           /* Triggers Cortex-M3 LDRD errata.  */
15501           || (!wback && load && fix_cm3_ldrd && (n == t))))
15502     return false;
15503
15504   if ((TARGET_ARM)
15505       && ((wback && (n == t || n == t2))
15506           || (t2 == PC_REGNUM)
15507           || (t % 2 != 0)   /* First destination register is not even.  */
15508           || (t2 != t + 1)
15509           /* PC can be used as base register (for offset addressing only),
15510              but it is depricated.  */
15511           || (n == PC_REGNUM)))
15512     return false;
15513
15514   return true;
15515 }
15516
15517 /* Return true if a 64-bit access with alignment ALIGN and with a
15518    constant offset OFFSET from the base pointer is permitted on this
15519    architecture.  */
15520 static bool
15521 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15522 {
15523   return (unaligned_access
15524           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15525           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15526 }
15527
15528 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15529    operand MEM's address contains an immediate offset from the base
15530    register and has no side effects, in which case it sets BASE,
15531    OFFSET and ALIGN accordingly.  */
15532 static bool
15533 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15534 {
15535   rtx addr;
15536
15537   gcc_assert (base != NULL && offset != NULL);
15538
15539   /* TODO: Handle more general memory operand patterns, such as
15540      PRE_DEC and PRE_INC.  */
15541
15542   if (side_effects_p (mem))
15543     return false;
15544
15545   /* Can't deal with subregs.  */
15546   if (GET_CODE (mem) == SUBREG)
15547     return false;
15548
15549   gcc_assert (MEM_P (mem));
15550
15551   *offset = const0_rtx;
15552   *align = MEM_ALIGN (mem);
15553
15554   addr = XEXP (mem, 0);
15555
15556   /* If addr isn't valid for DImode, then we can't handle it.  */
15557   if (!arm_legitimate_address_p (DImode, addr,
15558                                  reload_in_progress || reload_completed))
15559     return false;
15560
15561   if (REG_P (addr))
15562     {
15563       *base = addr;
15564       return true;
15565     }
15566   else if (GET_CODE (addr) == PLUS)
15567     {
15568       *base = XEXP (addr, 0);
15569       *offset = XEXP (addr, 1);
15570       return (REG_P (*base) && CONST_INT_P (*offset));
15571     }
15572
15573   return false;
15574 }
15575
15576 /* Called from a peephole2 to replace two word-size accesses with a
15577    single LDRD/STRD instruction.  Returns true iff we can generate a
15578    new instruction sequence.  That is, both accesses use the same base
15579    register and the gap between constant offsets is 4.  This function
15580    may reorder its operands to match ldrd/strd RTL templates.
15581    OPERANDS are the operands found by the peephole matcher;
15582    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15583    corresponding memory operands.  LOAD indicaates whether the access
15584    is load or store.  CONST_STORE indicates a store of constant
15585    integer values held in OPERANDS[4,5] and assumes that the pattern
15586    is of length 4 insn, for the purpose of checking dead registers.
15587    COMMUTE indicates that register operands may be reordered.  */
15588 bool
15589 gen_operands_ldrd_strd (rtx *operands, bool load,
15590                         bool const_store, bool commute)
15591 {
15592   int nops = 2;
15593   HOST_WIDE_INT offsets[2], offset, align[2];
15594   rtx base = NULL_RTX;
15595   rtx cur_base, cur_offset, tmp;
15596   int i, gap;
15597   HARD_REG_SET regset;
15598
15599   gcc_assert (!const_store || !load);
15600   /* Check that the memory references are immediate offsets from the
15601      same base register.  Extract the base register, the destination
15602      registers, and the corresponding memory offsets.  */
15603   for (i = 0; i < nops; i++)
15604     {
15605       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15606                                  &align[i]))
15607         return false;
15608
15609       if (i == 0)
15610         base = cur_base;
15611       else if (REGNO (base) != REGNO (cur_base))
15612         return false;
15613
15614       offsets[i] = INTVAL (cur_offset);
15615       if (GET_CODE (operands[i]) == SUBREG)
15616         {
15617           tmp = SUBREG_REG (operands[i]);
15618           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15619           operands[i] = tmp;
15620         }
15621     }
15622
15623   /* Make sure there is no dependency between the individual loads.  */
15624   if (load && REGNO (operands[0]) == REGNO (base))
15625     return false; /* RAW */
15626
15627   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15628     return false; /* WAW */
15629
15630   /* If the same input register is used in both stores
15631      when storing different constants, try to find a free register.
15632      For example, the code
15633         mov r0, 0
15634         str r0, [r2]
15635         mov r0, 1
15636         str r0, [r2, #4]
15637      can be transformed into
15638         mov r1, 0
15639         mov r0, 1
15640         strd r1, r0, [r2]
15641      in Thumb mode assuming that r1 is free.
15642      For ARM mode do the same but only if the starting register
15643      can be made to be even.  */
15644   if (const_store
15645       && REGNO (operands[0]) == REGNO (operands[1])
15646       && INTVAL (operands[4]) != INTVAL (operands[5]))
15647     {
15648     if (TARGET_THUMB2)
15649       {
15650         CLEAR_HARD_REG_SET (regset);
15651         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15652         if (tmp == NULL_RTX)
15653           return false;
15654
15655         /* Use the new register in the first load to ensure that
15656            if the original input register is not dead after peephole,
15657            then it will have the correct constant value.  */
15658         operands[0] = tmp;
15659       }
15660     else if (TARGET_ARM)
15661       {
15662         int regno = REGNO (operands[0]);
15663         if (!peep2_reg_dead_p (4, operands[0]))
15664           {
15665             /* When the input register is even and is not dead after the
15666                pattern, it has to hold the second constant but we cannot
15667                form a legal STRD in ARM mode with this register as the second
15668                register.  */
15669             if (regno % 2 == 0)
15670               return false;
15671
15672             /* Is regno-1 free? */
15673             SET_HARD_REG_SET (regset);
15674             CLEAR_HARD_REG_BIT(regset, regno - 1);
15675             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15676             if (tmp == NULL_RTX)
15677               return false;
15678
15679             operands[0] = tmp;
15680           }
15681         else
15682           {
15683             /* Find a DImode register.  */
15684             CLEAR_HARD_REG_SET (regset);
15685             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15686             if (tmp != NULL_RTX)
15687               {
15688                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15689                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15690               }
15691             else
15692               {
15693                 /* Can we use the input register to form a DI register?  */
15694                 SET_HARD_REG_SET (regset);
15695                 CLEAR_HARD_REG_BIT(regset,
15696                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15697                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15698                 if (tmp == NULL_RTX)
15699                   return false;
15700                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15701               }
15702           }
15703
15704         gcc_assert (operands[0] != NULL_RTX);
15705         gcc_assert (operands[1] != NULL_RTX);
15706         gcc_assert (REGNO (operands[0]) % 2 == 0);
15707         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15708       }
15709     }
15710
15711   /* Make sure the instructions are ordered with lower memory access first.  */
15712   if (offsets[0] > offsets[1])
15713     {
15714       gap = offsets[0] - offsets[1];
15715       offset = offsets[1];
15716
15717       /* Swap the instructions such that lower memory is accessed first.  */
15718       std::swap (operands[0], operands[1]);
15719       std::swap (operands[2], operands[3]);
15720       std::swap (align[0], align[1]);
15721       if (const_store)
15722         std::swap (operands[4], operands[5]);
15723     }
15724   else
15725     {
15726       gap = offsets[1] - offsets[0];
15727       offset = offsets[0];
15728     }
15729
15730   /* Make sure accesses are to consecutive memory locations.  */
15731   if (gap != GET_MODE_SIZE (SImode))
15732     return false;
15733
15734   if (!align_ok_ldrd_strd (align[0], offset))
15735     return false;
15736
15737   /* Make sure we generate legal instructions.  */
15738   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15739                              false, load))
15740     return true;
15741
15742   /* In Thumb state, where registers are almost unconstrained, there
15743      is little hope to fix it.  */
15744   if (TARGET_THUMB2)
15745     return false;
15746
15747   if (load && commute)
15748     {
15749       /* Try reordering registers.  */
15750       std::swap (operands[0], operands[1]);
15751       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15752                                  false, load))
15753         return true;
15754     }
15755
15756   if (const_store)
15757     {
15758       /* If input registers are dead after this pattern, they can be
15759          reordered or replaced by other registers that are free in the
15760          current pattern.  */
15761       if (!peep2_reg_dead_p (4, operands[0])
15762           || !peep2_reg_dead_p (4, operands[1]))
15763         return false;
15764
15765       /* Try to reorder the input registers.  */
15766       /* For example, the code
15767            mov r0, 0
15768            mov r1, 1
15769            str r1, [r2]
15770            str r0, [r2, #4]
15771          can be transformed into
15772            mov r1, 0
15773            mov r0, 1
15774            strd r0, [r2]
15775       */
15776       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15777                                   false, false))
15778         {
15779           std::swap (operands[0], operands[1]);
15780           return true;
15781         }
15782
15783       /* Try to find a free DI register.  */
15784       CLEAR_HARD_REG_SET (regset);
15785       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15786       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15787       while (true)
15788         {
15789           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15790           if (tmp == NULL_RTX)
15791             return false;
15792
15793           /* DREG must be an even-numbered register in DImode.
15794              Split it into SI registers.  */
15795           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15796           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15797           gcc_assert (operands[0] != NULL_RTX);
15798           gcc_assert (operands[1] != NULL_RTX);
15799           gcc_assert (REGNO (operands[0]) % 2 == 0);
15800           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15801
15802           return (operands_ok_ldrd_strd (operands[0], operands[1],
15803                                          base, offset,
15804                                          false, load));
15805         }
15806     }
15807
15808   return false;
15809 }
15810
15811
15812 /* Return true if parallel execution of the two word-size accesses provided
15813    could be satisfied with a single LDRD/STRD instruction.  Two word-size
15814    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
15815    register operands and OPERANDS[2,3] are the corresponding memory operands.
15816    */
15817 bool
15818 valid_operands_ldrd_strd (rtx *operands, bool load)
15819 {
15820   int nops = 2;
15821   HOST_WIDE_INT offsets[2], offset, align[2];
15822   rtx base = NULL_RTX;
15823   rtx cur_base, cur_offset;
15824   int i, gap;
15825
15826   /* Check that the memory references are immediate offsets from the
15827      same base register.  Extract the base register, the destination
15828      registers, and the corresponding memory offsets.  */
15829   for (i = 0; i < nops; i++)
15830     {
15831       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15832                                  &align[i]))
15833         return false;
15834
15835       if (i == 0)
15836         base = cur_base;
15837       else if (REGNO (base) != REGNO (cur_base))
15838         return false;
15839
15840       offsets[i] = INTVAL (cur_offset);
15841       if (GET_CODE (operands[i]) == SUBREG)
15842         return false;
15843     }
15844
15845   if (offsets[0] > offsets[1])
15846     return false;
15847
15848   gap = offsets[1] - offsets[0];
15849   offset = offsets[0];
15850
15851   /* Make sure accesses are to consecutive memory locations.  */
15852   if (gap != GET_MODE_SIZE (SImode))
15853     return false;
15854
15855   if (!align_ok_ldrd_strd (align[0], offset))
15856     return false;
15857
15858   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15859                                 false, load);
15860 }
15861
15862 \f
15863 /* Print a symbolic form of X to the debug file, F.  */
15864 static void
15865 arm_print_value (FILE *f, rtx x)
15866 {
15867   switch (GET_CODE (x))
15868     {
15869     case CONST_INT:
15870       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15871       return;
15872
15873     case CONST_DOUBLE:
15874       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15875       return;
15876
15877     case CONST_VECTOR:
15878       {
15879         int i;
15880
15881         fprintf (f, "<");
15882         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15883           {
15884             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15885             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15886               fputc (',', f);
15887           }
15888         fprintf (f, ">");
15889       }
15890       return;
15891
15892     case CONST_STRING:
15893       fprintf (f, "\"%s\"", XSTR (x, 0));
15894       return;
15895
15896     case SYMBOL_REF:
15897       fprintf (f, "`%s'", XSTR (x, 0));
15898       return;
15899
15900     case LABEL_REF:
15901       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15902       return;
15903
15904     case CONST:
15905       arm_print_value (f, XEXP (x, 0));
15906       return;
15907
15908     case PLUS:
15909       arm_print_value (f, XEXP (x, 0));
15910       fprintf (f, "+");
15911       arm_print_value (f, XEXP (x, 1));
15912       return;
15913
15914     case PC:
15915       fprintf (f, "pc");
15916       return;
15917
15918     default:
15919       fprintf (f, "????");
15920       return;
15921     }
15922 }
15923 \f
15924 /* Routines for manipulation of the constant pool.  */
15925
15926 /* Arm instructions cannot load a large constant directly into a
15927    register; they have to come from a pc relative load.  The constant
15928    must therefore be placed in the addressable range of the pc
15929    relative load.  Depending on the precise pc relative load
15930    instruction the range is somewhere between 256 bytes and 4k.  This
15931    means that we often have to dump a constant inside a function, and
15932    generate code to branch around it.
15933
15934    It is important to minimize this, since the branches will slow
15935    things down and make the code larger.
15936
15937    Normally we can hide the table after an existing unconditional
15938    branch so that there is no interruption of the flow, but in the
15939    worst case the code looks like this:
15940
15941         ldr     rn, L1
15942         ...
15943         b       L2
15944         align
15945         L1:     .long value
15946         L2:
15947         ...
15948
15949         ldr     rn, L3
15950         ...
15951         b       L4
15952         align
15953         L3:     .long value
15954         L4:
15955         ...
15956
15957    We fix this by performing a scan after scheduling, which notices
15958    which instructions need to have their operands fetched from the
15959    constant table and builds the table.
15960
15961    The algorithm starts by building a table of all the constants that
15962    need fixing up and all the natural barriers in the function (places
15963    where a constant table can be dropped without breaking the flow).
15964    For each fixup we note how far the pc-relative replacement will be
15965    able to reach and the offset of the instruction into the function.
15966
15967    Having built the table we then group the fixes together to form
15968    tables that are as large as possible (subject to addressing
15969    constraints) and emit each table of constants after the last
15970    barrier that is within range of all the instructions in the group.
15971    If a group does not contain a barrier, then we forcibly create one
15972    by inserting a jump instruction into the flow.  Once the table has
15973    been inserted, the insns are then modified to reference the
15974    relevant entry in the pool.
15975
15976    Possible enhancements to the algorithm (not implemented) are:
15977
15978    1) For some processors and object formats, there may be benefit in
15979    aligning the pools to the start of cache lines; this alignment
15980    would need to be taken into account when calculating addressability
15981    of a pool.  */
15982
15983 /* These typedefs are located at the start of this file, so that
15984    they can be used in the prototypes there.  This comment is to
15985    remind readers of that fact so that the following structures
15986    can be understood more easily.
15987
15988      typedef struct minipool_node    Mnode;
15989      typedef struct minipool_fixup   Mfix;  */
15990
15991 struct minipool_node
15992 {
15993   /* Doubly linked chain of entries.  */
15994   Mnode * next;
15995   Mnode * prev;
15996   /* The maximum offset into the code that this entry can be placed.  While
15997      pushing fixes for forward references, all entries are sorted in order
15998      of increasing max_address.  */
15999   HOST_WIDE_INT max_address;
16000   /* Similarly for an entry inserted for a backwards ref.  */
16001   HOST_WIDE_INT min_address;
16002   /* The number of fixes referencing this entry.  This can become zero
16003      if we "unpush" an entry.  In this case we ignore the entry when we
16004      come to emit the code.  */
16005   int refcount;
16006   /* The offset from the start of the minipool.  */
16007   HOST_WIDE_INT offset;
16008   /* The value in table.  */
16009   rtx value;
16010   /* The mode of value.  */
16011   machine_mode mode;
16012   /* The size of the value.  With iWMMXt enabled
16013      sizes > 4 also imply an alignment of 8-bytes.  */
16014   int fix_size;
16015 };
16016
16017 struct minipool_fixup
16018 {
16019   Mfix *            next;
16020   rtx_insn *        insn;
16021   HOST_WIDE_INT     address;
16022   rtx *             loc;
16023   machine_mode mode;
16024   int               fix_size;
16025   rtx               value;
16026   Mnode *           minipool;
16027   HOST_WIDE_INT     forwards;
16028   HOST_WIDE_INT     backwards;
16029 };
16030
16031 /* Fixes less than a word need padding out to a word boundary.  */
16032 #define MINIPOOL_FIX_SIZE(mode) \
16033   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16034
16035 static Mnode *  minipool_vector_head;
16036 static Mnode *  minipool_vector_tail;
16037 static rtx_code_label   *minipool_vector_label;
16038 static int      minipool_pad;
16039
16040 /* The linked list of all minipool fixes required for this function.  */
16041 Mfix *          minipool_fix_head;
16042 Mfix *          minipool_fix_tail;
16043 /* The fix entry for the current minipool, once it has been placed.  */
16044 Mfix *          minipool_barrier;
16045
16046 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16047 #define JUMP_TABLES_IN_TEXT_SECTION 0
16048 #endif
16049
16050 static HOST_WIDE_INT
16051 get_jump_table_size (rtx_jump_table_data *insn)
16052 {
16053   /* ADDR_VECs only take room if read-only data does into the text
16054      section.  */
16055   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16056     {
16057       rtx body = PATTERN (insn);
16058       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16059       HOST_WIDE_INT size;
16060       HOST_WIDE_INT modesize;
16061
16062       modesize = GET_MODE_SIZE (GET_MODE (body));
16063       size = modesize * XVECLEN (body, elt);
16064       switch (modesize)
16065         {
16066         case 1:
16067           /* Round up size  of TBB table to a halfword boundary.  */
16068           size = (size + 1) & ~HOST_WIDE_INT_1;
16069           break;
16070         case 2:
16071           /* No padding necessary for TBH.  */
16072           break;
16073         case 4:
16074           /* Add two bytes for alignment on Thumb.  */
16075           if (TARGET_THUMB)
16076             size += 2;
16077           break;
16078         default:
16079           gcc_unreachable ();
16080         }
16081       return size;
16082     }
16083
16084   return 0;
16085 }
16086
16087 /* Return the maximum amount of padding that will be inserted before
16088    label LABEL.  */
16089
16090 static HOST_WIDE_INT
16091 get_label_padding (rtx label)
16092 {
16093   HOST_WIDE_INT align, min_insn_size;
16094
16095   align = 1 << label_to_alignment (label).levels[0].log;
16096   min_insn_size = TARGET_THUMB ? 2 : 4;
16097   return align > min_insn_size ? align - min_insn_size : 0;
16098 }
16099
16100 /* Move a minipool fix MP from its current location to before MAX_MP.
16101    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16102    constraints may need updating.  */
16103 static Mnode *
16104 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16105                                HOST_WIDE_INT max_address)
16106 {
16107   /* The code below assumes these are different.  */
16108   gcc_assert (mp != max_mp);
16109
16110   if (max_mp == NULL)
16111     {
16112       if (max_address < mp->max_address)
16113         mp->max_address = max_address;
16114     }
16115   else
16116     {
16117       if (max_address > max_mp->max_address - mp->fix_size)
16118         mp->max_address = max_mp->max_address - mp->fix_size;
16119       else
16120         mp->max_address = max_address;
16121
16122       /* Unlink MP from its current position.  Since max_mp is non-null,
16123        mp->prev must be non-null.  */
16124       mp->prev->next = mp->next;
16125       if (mp->next != NULL)
16126         mp->next->prev = mp->prev;
16127       else
16128         minipool_vector_tail = mp->prev;
16129
16130       /* Re-insert it before MAX_MP.  */
16131       mp->next = max_mp;
16132       mp->prev = max_mp->prev;
16133       max_mp->prev = mp;
16134
16135       if (mp->prev != NULL)
16136         mp->prev->next = mp;
16137       else
16138         minipool_vector_head = mp;
16139     }
16140
16141   /* Save the new entry.  */
16142   max_mp = mp;
16143
16144   /* Scan over the preceding entries and adjust their addresses as
16145      required.  */
16146   while (mp->prev != NULL
16147          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16148     {
16149       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16150       mp = mp->prev;
16151     }
16152
16153   return max_mp;
16154 }
16155
16156 /* Add a constant to the minipool for a forward reference.  Returns the
16157    node added or NULL if the constant will not fit in this pool.  */
16158 static Mnode *
16159 add_minipool_forward_ref (Mfix *fix)
16160 {
16161   /* If set, max_mp is the first pool_entry that has a lower
16162      constraint than the one we are trying to add.  */
16163   Mnode *       max_mp = NULL;
16164   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16165   Mnode *       mp;
16166
16167   /* If the minipool starts before the end of FIX->INSN then this FIX
16168      cannot be placed into the current pool.  Furthermore, adding the
16169      new constant pool entry may cause the pool to start FIX_SIZE bytes
16170      earlier.  */
16171   if (minipool_vector_head &&
16172       (fix->address + get_attr_length (fix->insn)
16173        >= minipool_vector_head->max_address - fix->fix_size))
16174     return NULL;
16175
16176   /* Scan the pool to see if a constant with the same value has
16177      already been added.  While we are doing this, also note the
16178      location where we must insert the constant if it doesn't already
16179      exist.  */
16180   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16181     {
16182       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16183           && fix->mode == mp->mode
16184           && (!LABEL_P (fix->value)
16185               || (CODE_LABEL_NUMBER (fix->value)
16186                   == CODE_LABEL_NUMBER (mp->value)))
16187           && rtx_equal_p (fix->value, mp->value))
16188         {
16189           /* More than one fix references this entry.  */
16190           mp->refcount++;
16191           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16192         }
16193
16194       /* Note the insertion point if necessary.  */
16195       if (max_mp == NULL
16196           && mp->max_address > max_address)
16197         max_mp = mp;
16198
16199       /* If we are inserting an 8-bytes aligned quantity and
16200          we have not already found an insertion point, then
16201          make sure that all such 8-byte aligned quantities are
16202          placed at the start of the pool.  */
16203       if (ARM_DOUBLEWORD_ALIGN
16204           && max_mp == NULL
16205           && fix->fix_size >= 8
16206           && mp->fix_size < 8)
16207         {
16208           max_mp = mp;
16209           max_address = mp->max_address;
16210         }
16211     }
16212
16213   /* The value is not currently in the minipool, so we need to create
16214      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16215      the end of the list since the placement is less constrained than
16216      any existing entry.  Otherwise, we insert the new fix before
16217      MAX_MP and, if necessary, adjust the constraints on the other
16218      entries.  */
16219   mp = XNEW (Mnode);
16220   mp->fix_size = fix->fix_size;
16221   mp->mode = fix->mode;
16222   mp->value = fix->value;
16223   mp->refcount = 1;
16224   /* Not yet required for a backwards ref.  */
16225   mp->min_address = -65536;
16226
16227   if (max_mp == NULL)
16228     {
16229       mp->max_address = max_address;
16230       mp->next = NULL;
16231       mp->prev = minipool_vector_tail;
16232
16233       if (mp->prev == NULL)
16234         {
16235           minipool_vector_head = mp;
16236           minipool_vector_label = gen_label_rtx ();
16237         }
16238       else
16239         mp->prev->next = mp;
16240
16241       minipool_vector_tail = mp;
16242     }
16243   else
16244     {
16245       if (max_address > max_mp->max_address - mp->fix_size)
16246         mp->max_address = max_mp->max_address - mp->fix_size;
16247       else
16248         mp->max_address = max_address;
16249
16250       mp->next = max_mp;
16251       mp->prev = max_mp->prev;
16252       max_mp->prev = mp;
16253       if (mp->prev != NULL)
16254         mp->prev->next = mp;
16255       else
16256         minipool_vector_head = mp;
16257     }
16258
16259   /* Save the new entry.  */
16260   max_mp = mp;
16261
16262   /* Scan over the preceding entries and adjust their addresses as
16263      required.  */
16264   while (mp->prev != NULL
16265          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16266     {
16267       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16268       mp = mp->prev;
16269     }
16270
16271   return max_mp;
16272 }
16273
16274 static Mnode *
16275 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16276                                 HOST_WIDE_INT  min_address)
16277 {
16278   HOST_WIDE_INT offset;
16279
16280   /* The code below assumes these are different.  */
16281   gcc_assert (mp != min_mp);
16282
16283   if (min_mp == NULL)
16284     {
16285       if (min_address > mp->min_address)
16286         mp->min_address = min_address;
16287     }
16288   else
16289     {
16290       /* We will adjust this below if it is too loose.  */
16291       mp->min_address = min_address;
16292
16293       /* Unlink MP from its current position.  Since min_mp is non-null,
16294          mp->next must be non-null.  */
16295       mp->next->prev = mp->prev;
16296       if (mp->prev != NULL)
16297         mp->prev->next = mp->next;
16298       else
16299         minipool_vector_head = mp->next;
16300
16301       /* Reinsert it after MIN_MP.  */
16302       mp->prev = min_mp;
16303       mp->next = min_mp->next;
16304       min_mp->next = mp;
16305       if (mp->next != NULL)
16306         mp->next->prev = mp;
16307       else
16308         minipool_vector_tail = mp;
16309     }
16310
16311   min_mp = mp;
16312
16313   offset = 0;
16314   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16315     {
16316       mp->offset = offset;
16317       if (mp->refcount > 0)
16318         offset += mp->fix_size;
16319
16320       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16321         mp->next->min_address = mp->min_address + mp->fix_size;
16322     }
16323
16324   return min_mp;
16325 }
16326
16327 /* Add a constant to the minipool for a backward reference.  Returns the
16328    node added or NULL if the constant will not fit in this pool.
16329
16330    Note that the code for insertion for a backwards reference can be
16331    somewhat confusing because the calculated offsets for each fix do
16332    not take into account the size of the pool (which is still under
16333    construction.  */
16334 static Mnode *
16335 add_minipool_backward_ref (Mfix *fix)
16336 {
16337   /* If set, min_mp is the last pool_entry that has a lower constraint
16338      than the one we are trying to add.  */
16339   Mnode *min_mp = NULL;
16340   /* This can be negative, since it is only a constraint.  */
16341   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16342   Mnode *mp;
16343
16344   /* If we can't reach the current pool from this insn, or if we can't
16345      insert this entry at the end of the pool without pushing other
16346      fixes out of range, then we don't try.  This ensures that we
16347      can't fail later on.  */
16348   if (min_address >= minipool_barrier->address
16349       || (minipool_vector_tail->min_address + fix->fix_size
16350           >= minipool_barrier->address))
16351     return NULL;
16352
16353   /* Scan the pool to see if a constant with the same value has
16354      already been added.  While we are doing this, also note the
16355      location where we must insert the constant if it doesn't already
16356      exist.  */
16357   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16358     {
16359       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16360           && fix->mode == mp->mode
16361           && (!LABEL_P (fix->value)
16362               || (CODE_LABEL_NUMBER (fix->value)
16363                   == CODE_LABEL_NUMBER (mp->value)))
16364           && rtx_equal_p (fix->value, mp->value)
16365           /* Check that there is enough slack to move this entry to the
16366              end of the table (this is conservative).  */
16367           && (mp->max_address
16368               > (minipool_barrier->address
16369                  + minipool_vector_tail->offset
16370                  + minipool_vector_tail->fix_size)))
16371         {
16372           mp->refcount++;
16373           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16374         }
16375
16376       if (min_mp != NULL)
16377         mp->min_address += fix->fix_size;
16378       else
16379         {
16380           /* Note the insertion point if necessary.  */
16381           if (mp->min_address < min_address)
16382             {
16383               /* For now, we do not allow the insertion of 8-byte alignment
16384                  requiring nodes anywhere but at the start of the pool.  */
16385               if (ARM_DOUBLEWORD_ALIGN
16386                   && fix->fix_size >= 8 && mp->fix_size < 8)
16387                 return NULL;
16388               else
16389                 min_mp = mp;
16390             }
16391           else if (mp->max_address
16392                    < minipool_barrier->address + mp->offset + fix->fix_size)
16393             {
16394               /* Inserting before this entry would push the fix beyond
16395                  its maximum address (which can happen if we have
16396                  re-located a forwards fix); force the new fix to come
16397                  after it.  */
16398               if (ARM_DOUBLEWORD_ALIGN
16399                   && fix->fix_size >= 8 && mp->fix_size < 8)
16400                 return NULL;
16401               else
16402                 {
16403                   min_mp = mp;
16404                   min_address = mp->min_address + fix->fix_size;
16405                 }
16406             }
16407           /* Do not insert a non-8-byte aligned quantity before 8-byte
16408              aligned quantities.  */
16409           else if (ARM_DOUBLEWORD_ALIGN
16410                    && fix->fix_size < 8
16411                    && mp->fix_size >= 8)
16412             {
16413               min_mp = mp;
16414               min_address = mp->min_address + fix->fix_size;
16415             }
16416         }
16417     }
16418
16419   /* We need to create a new entry.  */
16420   mp = XNEW (Mnode);
16421   mp->fix_size = fix->fix_size;
16422   mp->mode = fix->mode;
16423   mp->value = fix->value;
16424   mp->refcount = 1;
16425   mp->max_address = minipool_barrier->address + 65536;
16426
16427   mp->min_address = min_address;
16428
16429   if (min_mp == NULL)
16430     {
16431       mp->prev = NULL;
16432       mp->next = minipool_vector_head;
16433
16434       if (mp->next == NULL)
16435         {
16436           minipool_vector_tail = mp;
16437           minipool_vector_label = gen_label_rtx ();
16438         }
16439       else
16440         mp->next->prev = mp;
16441
16442       minipool_vector_head = mp;
16443     }
16444   else
16445     {
16446       mp->next = min_mp->next;
16447       mp->prev = min_mp;
16448       min_mp->next = mp;
16449
16450       if (mp->next != NULL)
16451         mp->next->prev = mp;
16452       else
16453         minipool_vector_tail = mp;
16454     }
16455
16456   /* Save the new entry.  */
16457   min_mp = mp;
16458
16459   if (mp->prev)
16460     mp = mp->prev;
16461   else
16462     mp->offset = 0;
16463
16464   /* Scan over the following entries and adjust their offsets.  */
16465   while (mp->next != NULL)
16466     {
16467       if (mp->next->min_address < mp->min_address + mp->fix_size)
16468         mp->next->min_address = mp->min_address + mp->fix_size;
16469
16470       if (mp->refcount)
16471         mp->next->offset = mp->offset + mp->fix_size;
16472       else
16473         mp->next->offset = mp->offset;
16474
16475       mp = mp->next;
16476     }
16477
16478   return min_mp;
16479 }
16480
16481 static void
16482 assign_minipool_offsets (Mfix *barrier)
16483 {
16484   HOST_WIDE_INT offset = 0;
16485   Mnode *mp;
16486
16487   minipool_barrier = barrier;
16488
16489   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16490     {
16491       mp->offset = offset;
16492
16493       if (mp->refcount > 0)
16494         offset += mp->fix_size;
16495     }
16496 }
16497
16498 /* Output the literal table */
16499 static void
16500 dump_minipool (rtx_insn *scan)
16501 {
16502   Mnode * mp;
16503   Mnode * nmp;
16504   int align64 = 0;
16505
16506   if (ARM_DOUBLEWORD_ALIGN)
16507     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16508       if (mp->refcount > 0 && mp->fix_size >= 8)
16509         {
16510           align64 = 1;
16511           break;
16512         }
16513
16514   if (dump_file)
16515     fprintf (dump_file,
16516              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16517              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16518
16519   scan = emit_label_after (gen_label_rtx (), scan);
16520   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16521   scan = emit_label_after (minipool_vector_label, scan);
16522
16523   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16524     {
16525       if (mp->refcount > 0)
16526         {
16527           if (dump_file)
16528             {
16529               fprintf (dump_file,
16530                        ";;  Offset %u, min %ld, max %ld ",
16531                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16532                        (unsigned long) mp->max_address);
16533               arm_print_value (dump_file, mp->value);
16534               fputc ('\n', dump_file);
16535             }
16536
16537           rtx val = copy_rtx (mp->value);
16538
16539           switch (GET_MODE_SIZE (mp->mode))
16540             {
16541 #ifdef HAVE_consttable_1
16542             case 1:
16543               scan = emit_insn_after (gen_consttable_1 (val), scan);
16544               break;
16545
16546 #endif
16547 #ifdef HAVE_consttable_2
16548             case 2:
16549               scan = emit_insn_after (gen_consttable_2 (val), scan);
16550               break;
16551
16552 #endif
16553 #ifdef HAVE_consttable_4
16554             case 4:
16555               scan = emit_insn_after (gen_consttable_4 (val), scan);
16556               break;
16557
16558 #endif
16559 #ifdef HAVE_consttable_8
16560             case 8:
16561               scan = emit_insn_after (gen_consttable_8 (val), scan);
16562               break;
16563
16564 #endif
16565 #ifdef HAVE_consttable_16
16566             case 16:
16567               scan = emit_insn_after (gen_consttable_16 (val), scan);
16568               break;
16569
16570 #endif
16571             default:
16572               gcc_unreachable ();
16573             }
16574         }
16575
16576       nmp = mp->next;
16577       free (mp);
16578     }
16579
16580   minipool_vector_head = minipool_vector_tail = NULL;
16581   scan = emit_insn_after (gen_consttable_end (), scan);
16582   scan = emit_barrier_after (scan);
16583 }
16584
16585 /* Return the cost of forcibly inserting a barrier after INSN.  */
16586 static int
16587 arm_barrier_cost (rtx_insn *insn)
16588 {
16589   /* Basing the location of the pool on the loop depth is preferable,
16590      but at the moment, the basic block information seems to be
16591      corrupt by this stage of the compilation.  */
16592   int base_cost = 50;
16593   rtx_insn *next = next_nonnote_insn (insn);
16594
16595   if (next != NULL && LABEL_P (next))
16596     base_cost -= 20;
16597
16598   switch (GET_CODE (insn))
16599     {
16600     case CODE_LABEL:
16601       /* It will always be better to place the table before the label, rather
16602          than after it.  */
16603       return 50;
16604
16605     case INSN:
16606     case CALL_INSN:
16607       return base_cost;
16608
16609     case JUMP_INSN:
16610       return base_cost - 10;
16611
16612     default:
16613       return base_cost + 10;
16614     }
16615 }
16616
16617 /* Find the best place in the insn stream in the range
16618    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16619    Create the barrier by inserting a jump and add a new fix entry for
16620    it.  */
16621 static Mfix *
16622 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16623 {
16624   HOST_WIDE_INT count = 0;
16625   rtx_barrier *barrier;
16626   rtx_insn *from = fix->insn;
16627   /* The instruction after which we will insert the jump.  */
16628   rtx_insn *selected = NULL;
16629   int selected_cost;
16630   /* The address at which the jump instruction will be placed.  */
16631   HOST_WIDE_INT selected_address;
16632   Mfix * new_fix;
16633   HOST_WIDE_INT max_count = max_address - fix->address;
16634   rtx_code_label *label = gen_label_rtx ();
16635
16636   selected_cost = arm_barrier_cost (from);
16637   selected_address = fix->address;
16638
16639   while (from && count < max_count)
16640     {
16641       rtx_jump_table_data *tmp;
16642       int new_cost;
16643
16644       /* This code shouldn't have been called if there was a natural barrier
16645          within range.  */
16646       gcc_assert (!BARRIER_P (from));
16647
16648       /* Count the length of this insn.  This must stay in sync with the
16649          code that pushes minipool fixes.  */
16650       if (LABEL_P (from))
16651         count += get_label_padding (from);
16652       else
16653         count += get_attr_length (from);
16654
16655       /* If there is a jump table, add its length.  */
16656       if (tablejump_p (from, NULL, &tmp))
16657         {
16658           count += get_jump_table_size (tmp);
16659
16660           /* Jump tables aren't in a basic block, so base the cost on
16661              the dispatch insn.  If we select this location, we will
16662              still put the pool after the table.  */
16663           new_cost = arm_barrier_cost (from);
16664
16665           if (count < max_count
16666               && (!selected || new_cost <= selected_cost))
16667             {
16668               selected = tmp;
16669               selected_cost = new_cost;
16670               selected_address = fix->address + count;
16671             }
16672
16673           /* Continue after the dispatch table.  */
16674           from = NEXT_INSN (tmp);
16675           continue;
16676         }
16677
16678       new_cost = arm_barrier_cost (from);
16679
16680       if (count < max_count
16681           && (!selected || new_cost <= selected_cost))
16682         {
16683           selected = from;
16684           selected_cost = new_cost;
16685           selected_address = fix->address + count;
16686         }
16687
16688       from = NEXT_INSN (from);
16689     }
16690
16691   /* Make sure that we found a place to insert the jump.  */
16692   gcc_assert (selected);
16693
16694   /* Create a new JUMP_INSN that branches around a barrier.  */
16695   from = emit_jump_insn_after (gen_jump (label), selected);
16696   JUMP_LABEL (from) = label;
16697   barrier = emit_barrier_after (from);
16698   emit_label_after (label, barrier);
16699
16700   /* Create a minipool barrier entry for the new barrier.  */
16701   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16702   new_fix->insn = barrier;
16703   new_fix->address = selected_address;
16704   new_fix->next = fix->next;
16705   fix->next = new_fix;
16706
16707   return new_fix;
16708 }
16709
16710 /* Record that there is a natural barrier in the insn stream at
16711    ADDRESS.  */
16712 static void
16713 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16714 {
16715   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16716
16717   fix->insn = insn;
16718   fix->address = address;
16719
16720   fix->next = NULL;
16721   if (minipool_fix_head != NULL)
16722     minipool_fix_tail->next = fix;
16723   else
16724     minipool_fix_head = fix;
16725
16726   minipool_fix_tail = fix;
16727 }
16728
16729 /* Record INSN, which will need fixing up to load a value from the
16730    minipool.  ADDRESS is the offset of the insn since the start of the
16731    function; LOC is a pointer to the part of the insn which requires
16732    fixing; VALUE is the constant that must be loaded, which is of type
16733    MODE.  */
16734 static void
16735 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16736                    machine_mode mode, rtx value)
16737 {
16738   gcc_assert (!arm_disable_literal_pool);
16739   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16740
16741   fix->insn = insn;
16742   fix->address = address;
16743   fix->loc = loc;
16744   fix->mode = mode;
16745   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16746   fix->value = value;
16747   fix->forwards = get_attr_pool_range (insn);
16748   fix->backwards = get_attr_neg_pool_range (insn);
16749   fix->minipool = NULL;
16750
16751   /* If an insn doesn't have a range defined for it, then it isn't
16752      expecting to be reworked by this code.  Better to stop now than
16753      to generate duff assembly code.  */
16754   gcc_assert (fix->forwards || fix->backwards);
16755
16756   /* If an entry requires 8-byte alignment then assume all constant pools
16757      require 4 bytes of padding.  Trying to do this later on a per-pool
16758      basis is awkward because existing pool entries have to be modified.  */
16759   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16760     minipool_pad = 4;
16761
16762   if (dump_file)
16763     {
16764       fprintf (dump_file,
16765                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16766                GET_MODE_NAME (mode),
16767                INSN_UID (insn), (unsigned long) address,
16768                -1 * (long)fix->backwards, (long)fix->forwards);
16769       arm_print_value (dump_file, fix->value);
16770       fprintf (dump_file, "\n");
16771     }
16772
16773   /* Add it to the chain of fixes.  */
16774   fix->next = NULL;
16775
16776   if (minipool_fix_head != NULL)
16777     minipool_fix_tail->next = fix;
16778   else
16779     minipool_fix_head = fix;
16780
16781   minipool_fix_tail = fix;
16782 }
16783
16784 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16785    Returns the number of insns needed, or 99 if we always want to synthesize
16786    the value.  */
16787 int
16788 arm_max_const_double_inline_cost ()
16789 {
16790   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16791 }
16792
16793 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16794    Returns the number of insns needed, or 99 if we don't know how to
16795    do it.  */
16796 int
16797 arm_const_double_inline_cost (rtx val)
16798 {
16799   rtx lowpart, highpart;
16800   machine_mode mode;
16801
16802   mode = GET_MODE (val);
16803
16804   if (mode == VOIDmode)
16805     mode = DImode;
16806
16807   gcc_assert (GET_MODE_SIZE (mode) == 8);
16808
16809   lowpart = gen_lowpart (SImode, val);
16810   highpart = gen_highpart_mode (SImode, mode, val);
16811
16812   gcc_assert (CONST_INT_P (lowpart));
16813   gcc_assert (CONST_INT_P (highpart));
16814
16815   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16816                             NULL_RTX, NULL_RTX, 0, 0)
16817           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16818                               NULL_RTX, NULL_RTX, 0, 0));
16819 }
16820
16821 /* Cost of loading a SImode constant.  */
16822 static inline int
16823 arm_const_inline_cost (enum rtx_code code, rtx val)
16824 {
16825   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16826                            NULL_RTX, NULL_RTX, 1, 0);
16827 }
16828
16829 /* Return true if it is worthwhile to split a 64-bit constant into two
16830    32-bit operations.  This is the case if optimizing for size, or
16831    if we have load delay slots, or if one 32-bit part can be done with
16832    a single data operation.  */
16833 bool
16834 arm_const_double_by_parts (rtx val)
16835 {
16836   machine_mode mode = GET_MODE (val);
16837   rtx part;
16838
16839   if (optimize_size || arm_ld_sched)
16840     return true;
16841
16842   if (mode == VOIDmode)
16843     mode = DImode;
16844
16845   part = gen_highpart_mode (SImode, mode, val);
16846
16847   gcc_assert (CONST_INT_P (part));
16848
16849   if (const_ok_for_arm (INTVAL (part))
16850       || const_ok_for_arm (~INTVAL (part)))
16851     return true;
16852
16853   part = gen_lowpart (SImode, val);
16854
16855   gcc_assert (CONST_INT_P (part));
16856
16857   if (const_ok_for_arm (INTVAL (part))
16858       || const_ok_for_arm (~INTVAL (part)))
16859     return true;
16860
16861   return false;
16862 }
16863
16864 /* Return true if it is possible to inline both the high and low parts
16865    of a 64-bit constant into 32-bit data processing instructions.  */
16866 bool
16867 arm_const_double_by_immediates (rtx val)
16868 {
16869   machine_mode mode = GET_MODE (val);
16870   rtx part;
16871
16872   if (mode == VOIDmode)
16873     mode = DImode;
16874
16875   part = gen_highpart_mode (SImode, mode, val);
16876
16877   gcc_assert (CONST_INT_P (part));
16878
16879   if (!const_ok_for_arm (INTVAL (part)))
16880     return false;
16881
16882   part = gen_lowpart (SImode, val);
16883
16884   gcc_assert (CONST_INT_P (part));
16885
16886   if (!const_ok_for_arm (INTVAL (part)))
16887     return false;
16888
16889   return true;
16890 }
16891
16892 /* Scan INSN and note any of its operands that need fixing.
16893    If DO_PUSHES is false we do not actually push any of the fixups
16894    needed.  */
16895 static void
16896 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16897 {
16898   int opno;
16899
16900   extract_constrain_insn (insn);
16901
16902   if (recog_data.n_alternatives == 0)
16903     return;
16904
16905   /* Fill in recog_op_alt with information about the constraints of
16906      this insn.  */
16907   preprocess_constraints (insn);
16908
16909   const operand_alternative *op_alt = which_op_alt ();
16910   for (opno = 0; opno < recog_data.n_operands; opno++)
16911     {
16912       /* Things we need to fix can only occur in inputs.  */
16913       if (recog_data.operand_type[opno] != OP_IN)
16914         continue;
16915
16916       /* If this alternative is a memory reference, then any mention
16917          of constants in this alternative is really to fool reload
16918          into allowing us to accept one there.  We need to fix them up
16919          now so that we output the right code.  */
16920       if (op_alt[opno].memory_ok)
16921         {
16922           rtx op = recog_data.operand[opno];
16923
16924           if (CONSTANT_P (op))
16925             {
16926               if (do_pushes)
16927                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16928                                    recog_data.operand_mode[opno], op);
16929             }
16930           else if (MEM_P (op)
16931                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16932                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16933             {
16934               if (do_pushes)
16935                 {
16936                   rtx cop = avoid_constant_pool_reference (op);
16937
16938                   /* Casting the address of something to a mode narrower
16939                      than a word can cause avoid_constant_pool_reference()
16940                      to return the pool reference itself.  That's no good to
16941                      us here.  Lets just hope that we can use the
16942                      constant pool value directly.  */
16943                   if (op == cop)
16944                     cop = get_pool_constant (XEXP (op, 0));
16945
16946                   push_minipool_fix (insn, address,
16947                                      recog_data.operand_loc[opno],
16948                                      recog_data.operand_mode[opno], cop);
16949                 }
16950
16951             }
16952         }
16953     }
16954
16955   return;
16956 }
16957
16958 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16959    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16960    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16961    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16962    or four masks, depending on whether it is being computed for a
16963    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16964    respectively.  The tree for the type of the argument or a field within an
16965    argument is passed in ARG_TYPE, the current register this argument or field
16966    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16967    argument or field starts at is passed in STARTING_BIT and the last used bit
16968    is kept in LAST_USED_BIT which is also updated accordingly.  */
16969
16970 static unsigned HOST_WIDE_INT
16971 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16972                                uint32_t * padding_bits_to_clear,
16973                                unsigned starting_bit, int * last_used_bit)
16974
16975 {
16976   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16977
16978   if (TREE_CODE (arg_type) == RECORD_TYPE)
16979     {
16980       unsigned current_bit = starting_bit;
16981       tree field;
16982       long int offset, size;
16983
16984
16985       field = TYPE_FIELDS (arg_type);
16986       while (field)
16987         {
16988           /* The offset within a structure is always an offset from
16989              the start of that structure.  Make sure we take that into the
16990              calculation of the register based offset that we use here.  */
16991           offset = starting_bit;
16992           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16993           offset %= 32;
16994
16995           /* This is the actual size of the field, for bitfields this is the
16996              bitfield width and not the container size.  */
16997           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16998
16999           if (*last_used_bit != offset)
17000             {
17001               if (offset < *last_used_bit)
17002                 {
17003                   /* This field's offset is before the 'last_used_bit', that
17004                      means this field goes on the next register.  So we need to
17005                      pad the rest of the current register and increase the
17006                      register number.  */
17007                   uint32_t mask;
17008                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
17009                   mask++;
17010
17011                   padding_bits_to_clear[*regno] |= mask;
17012                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17013                   (*regno)++;
17014                 }
17015               else
17016                 {
17017                   /* Otherwise we pad the bits between the last field's end and
17018                      the start of the new field.  */
17019                   uint32_t mask;
17020
17021                   mask = ((uint32_t)-1) >> (32 - offset);
17022                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
17023                   padding_bits_to_clear[*regno] |= mask;
17024                 }
17025               current_bit = offset;
17026             }
17027
17028           /* Calculate further padding bits for inner structs/unions too.  */
17029           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
17030             {
17031               *last_used_bit = current_bit;
17032               not_to_clear_reg_mask
17033                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
17034                                                   padding_bits_to_clear, offset,
17035                                                   last_used_bit);
17036             }
17037           else
17038             {
17039               /* Update 'current_bit' with this field's size.  If the
17040                  'current_bit' lies in a subsequent register, update 'regno' and
17041                  reset 'current_bit' to point to the current bit in that new
17042                  register.  */
17043               current_bit += size;
17044               while (current_bit >= 32)
17045                 {
17046                   current_bit-=32;
17047                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17048                   (*regno)++;
17049                 }
17050               *last_used_bit = current_bit;
17051             }
17052
17053           field = TREE_CHAIN (field);
17054         }
17055       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17056     }
17057   else if (TREE_CODE (arg_type) == UNION_TYPE)
17058     {
17059       tree field, field_t;
17060       int i, regno_t, field_size;
17061       int max_reg = -1;
17062       int max_bit = -1;
17063       uint32_t mask;
17064       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
17065         = {-1, -1, -1, -1};
17066
17067       /* To compute the padding bits in a union we only consider bits as
17068          padding bits if they are always either a padding bit or fall outside a
17069          fields size for all fields in the union.  */
17070       field = TYPE_FIELDS (arg_type);
17071       while (field)
17072         {
17073           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
17074             = {0U, 0U, 0U, 0U};
17075           int last_used_bit_t = *last_used_bit;
17076           regno_t = *regno;
17077           field_t = TREE_TYPE (field);
17078
17079           /* If the field's type is either a record or a union make sure to
17080              compute their padding bits too.  */
17081           if (RECORD_OR_UNION_TYPE_P (field_t))
17082             not_to_clear_reg_mask
17083               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
17084                                                 &padding_bits_to_clear_t[0],
17085                                                 starting_bit, &last_used_bit_t);
17086           else
17087             {
17088               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17089               regno_t = (field_size / 32) + *regno;
17090               last_used_bit_t = (starting_bit + field_size) % 32;
17091             }
17092
17093           for (i = *regno; i < regno_t; i++)
17094             {
17095               /* For all but the last register used by this field only keep the
17096                  padding bits that were padding bits in this field.  */
17097               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
17098             }
17099
17100             /* For the last register, keep all padding bits that were padding
17101                bits in this field and any padding bits that are still valid
17102                as padding bits but fall outside of this field's size.  */
17103             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17104             padding_bits_to_clear_res[regno_t]
17105               &= padding_bits_to_clear_t[regno_t] | mask;
17106
17107           /* Update the maximum size of the fields in terms of registers used
17108              ('max_reg') and the 'last_used_bit' in said register.  */
17109           if (max_reg < regno_t)
17110             {
17111               max_reg = regno_t;
17112               max_bit = last_used_bit_t;
17113             }
17114           else if (max_reg == regno_t && max_bit < last_used_bit_t)
17115             max_bit = last_used_bit_t;
17116
17117           field = TREE_CHAIN (field);
17118         }
17119
17120       /* Update the current padding_bits_to_clear using the intersection of the
17121          padding bits of all the fields.  */
17122       for (i=*regno; i < max_reg; i++)
17123         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17124
17125       /* Do not keep trailing padding bits, we do not know yet whether this
17126          is the end of the argument.  */
17127       mask = ((uint32_t) 1 << max_bit) - 1;
17128       padding_bits_to_clear[max_reg]
17129         |= padding_bits_to_clear_res[max_reg] & mask;
17130
17131       *regno = max_reg;
17132       *last_used_bit = max_bit;
17133     }
17134   else
17135     /* This function should only be used for structs and unions.  */
17136     gcc_unreachable ();
17137
17138   return not_to_clear_reg_mask;
17139 }
17140
17141 /* In the context of ARMv8-M Security Extensions, this function is used for both
17142    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17143    registers are used when returning or passing arguments, which is then
17144    returned as a mask.  It will also compute a mask to indicate padding/unused
17145    bits for each of these registers, and passes this through the
17146    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
17147    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17148    the starting register used to pass this argument or return value is passed
17149    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17150    for struct and union types.  */
17151
17152 static unsigned HOST_WIDE_INT
17153 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17154                              uint32_t * padding_bits_to_clear)
17155
17156 {
17157   int last_used_bit = 0;
17158   unsigned HOST_WIDE_INT not_to_clear_mask;
17159
17160   if (RECORD_OR_UNION_TYPE_P (arg_type))
17161     {
17162       not_to_clear_mask
17163         = comp_not_to_clear_mask_str_un (arg_type, &regno,
17164                                          padding_bits_to_clear, 0,
17165                                          &last_used_bit);
17166
17167
17168       /* If the 'last_used_bit' is not zero, that means we are still using a
17169          part of the last 'regno'.  In such cases we must clear the trailing
17170          bits.  Otherwise we are not using regno and we should mark it as to
17171          clear.  */
17172       if (last_used_bit != 0)
17173         padding_bits_to_clear[regno]
17174           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17175       else
17176         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17177     }
17178   else
17179     {
17180       not_to_clear_mask = 0;
17181       /* We are not dealing with structs nor unions.  So these arguments may be
17182          passed in floating point registers too.  In some cases a BLKmode is
17183          used when returning or passing arguments in multiple VFP registers.  */
17184       if (GET_MODE (arg_rtx) == BLKmode)
17185         {
17186           int i, arg_regs;
17187           rtx reg;
17188
17189           /* This should really only occur when dealing with the hard-float
17190              ABI.  */
17191           gcc_assert (TARGET_HARD_FLOAT_ABI);
17192
17193           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17194             {
17195               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17196               gcc_assert (REG_P (reg));
17197
17198               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17199
17200               /* If we are dealing with DF mode, make sure we don't
17201                  clear either of the registers it addresses.  */
17202               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17203               if (arg_regs > 1)
17204                 {
17205                   unsigned HOST_WIDE_INT mask;
17206                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17207                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
17208                   not_to_clear_mask |= mask;
17209                 }
17210             }
17211         }
17212       else
17213         {
17214           /* Otherwise we can rely on the MODE to determine how many registers
17215              are being used by this argument.  */
17216           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17217           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17218           if (arg_regs > 1)
17219             {
17220               unsigned HOST_WIDE_INT
17221               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17222               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17223               not_to_clear_mask |= mask;
17224             }
17225         }
17226     }
17227
17228   return not_to_clear_mask;
17229 }
17230
17231 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17232    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
17233    are to be fully cleared, using the value in register CLEARING_REG if more
17234    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17235    the bits that needs to be cleared in caller-saved core registers, with
17236    SCRATCH_REG used as a scratch register for that clearing.
17237
17238    NOTE: one of three following assertions must hold:
17239    - SCRATCH_REG is a low register
17240    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17241      in TO_CLEAR_BITMAP)
17242    - CLEARING_REG is a low register.  */
17243
17244 static void
17245 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17246                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17247 {
17248   bool saved_clearing = false;
17249   rtx saved_clearing_reg = NULL_RTX;
17250   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17251
17252   gcc_assert (arm_arch_cmse);
17253
17254   if (!bitmap_empty_p (to_clear_bitmap))
17255     {
17256       minregno = bitmap_first_set_bit (to_clear_bitmap);
17257       maxregno = bitmap_last_set_bit (to_clear_bitmap);
17258     }
17259   clearing_regno = REGNO (clearing_reg);
17260
17261   /* Clear padding bits.  */
17262   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17263   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17264     {
17265       uint64_t mask;
17266       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17267
17268       if (padding_bits_to_clear[i] == 0)
17269         continue;
17270
17271       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17272          CLEARING_REG as scratch.  */
17273       if (TARGET_THUMB1
17274           && REGNO (scratch_reg) > LAST_LO_REGNUM)
17275         {
17276           /* clearing_reg is not to be cleared, copy its value into scratch_reg
17277              such that we can use clearing_reg to clear the unused bits in the
17278              arguments.  */
17279           if ((clearing_regno > maxregno
17280                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17281               && !saved_clearing)
17282             {
17283               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17284               emit_move_insn (scratch_reg, clearing_reg);
17285               saved_clearing = true;
17286               saved_clearing_reg = scratch_reg;
17287             }
17288           scratch_reg = clearing_reg;
17289         }
17290
17291       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
17292       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17293       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17294
17295       /* Fill the top half of the negated padding_bits_to_clear[i].  */
17296       mask = (~padding_bits_to_clear[i]) >> 16;
17297       rtx16 = gen_int_mode (16, SImode);
17298       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17299       if (mask)
17300         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17301
17302       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17303     }
17304   if (saved_clearing)
17305     emit_move_insn (clearing_reg, saved_clearing_reg);
17306
17307
17308   /* Clear full registers.  */
17309
17310   /* If not marked for clearing, clearing_reg already does not contain
17311      any secret.  */
17312   if (clearing_regno <= maxregno
17313       && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17314     {
17315       emit_move_insn (clearing_reg, const0_rtx);
17316       emit_use (clearing_reg);
17317       bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17318     }
17319
17320   for (regno = minregno; regno <= maxregno; regno++)
17321     {
17322       if (!bitmap_bit_p (to_clear_bitmap, regno))
17323         continue;
17324
17325       if (IS_VFP_REGNUM (regno))
17326         {
17327           /* If regno is an even vfp register and its successor is also to
17328              be cleared, use vmov.  */
17329           if (TARGET_VFP_DOUBLE
17330               && VFP_REGNO_OK_FOR_DOUBLE (regno)
17331               && bitmap_bit_p (to_clear_bitmap, regno + 1))
17332             {
17333               emit_move_insn (gen_rtx_REG (DFmode, regno),
17334                               CONST1_RTX (DFmode));
17335               emit_use (gen_rtx_REG (DFmode, regno));
17336               regno++;
17337             }
17338           else
17339             {
17340               emit_move_insn (gen_rtx_REG (SFmode, regno),
17341                               CONST1_RTX (SFmode));
17342               emit_use (gen_rtx_REG (SFmode, regno));
17343             }
17344         }
17345       else
17346         {
17347           emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17348           emit_use (gen_rtx_REG (SImode, regno));
17349         }
17350     }
17351 }
17352
17353 /* Clears caller saved registers not used to pass arguments before a
17354    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
17355    registers is done in __gnu_cmse_nonsecure_call libcall.
17356    See libgcc/config/arm/cmse_nonsecure_call.S.  */
17357
17358 static void
17359 cmse_nonsecure_call_clear_caller_saved (void)
17360 {
17361   basic_block bb;
17362
17363   FOR_EACH_BB_FN (bb, cfun)
17364     {
17365       rtx_insn *insn;
17366
17367       FOR_BB_INSNS (bb, insn)
17368         {
17369           unsigned address_regnum, regno, maxregno =
17370             TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17371           auto_sbitmap to_clear_bitmap (maxregno + 1);
17372           rtx_insn *seq;
17373           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17374           rtx address;
17375           CUMULATIVE_ARGS args_so_far_v;
17376           cumulative_args_t args_so_far;
17377           tree arg_type, fntype;
17378           bool first_param = true;
17379           function_args_iterator args_iter;
17380           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17381
17382           if (!NONDEBUG_INSN_P (insn))
17383             continue;
17384
17385           if (!CALL_P (insn))
17386             continue;
17387
17388           pat = PATTERN (insn);
17389           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17390           call = XVECEXP (pat, 0, 0);
17391
17392           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17393           if (GET_CODE (call) == SET)
17394               call = SET_SRC (call);
17395
17396           /* Check if it is a cmse_nonsecure_call.  */
17397           unspec = XEXP (call, 0);
17398           if (GET_CODE (unspec) != UNSPEC
17399               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17400             continue;
17401
17402           /* Determine the caller-saved registers we need to clear.  */
17403           bitmap_clear (to_clear_bitmap);
17404           bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17405
17406           /* Only look at the caller-saved floating point registers in case of
17407              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17408              lazy store and loads which clear both caller- and callee-saved
17409              registers.  */
17410           if (TARGET_HARD_FLOAT_ABI)
17411             {
17412               auto_sbitmap float_bitmap (maxregno + 1);
17413
17414               bitmap_clear (float_bitmap);
17415               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17416                                 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17417               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17418             }
17419
17420           /* Make sure the register used to hold the function address is not
17421              cleared.  */
17422           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17423           gcc_assert (MEM_P (address));
17424           gcc_assert (REG_P (XEXP (address, 0)));
17425           address_regnum = REGNO (XEXP (address, 0));
17426           if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17427             bitmap_clear_bit (to_clear_bitmap, address_regnum);
17428
17429           /* Set basic block of call insn so that df rescan is performed on
17430              insns inserted here.  */
17431           set_block_for_insn (insn, bb);
17432           df_set_flags (DF_DEFER_INSN_RESCAN);
17433           start_sequence ();
17434
17435           /* Make sure the scheduler doesn't schedule other insns beyond
17436              here.  */
17437           emit_insn (gen_blockage ());
17438
17439           /* Walk through all arguments and clear registers appropriately.
17440           */
17441           fntype = TREE_TYPE (MEM_EXPR (address));
17442           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17443                                     NULL_TREE);
17444           args_so_far = pack_cumulative_args (&args_so_far_v);
17445           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17446             {
17447               rtx arg_rtx;
17448               uint64_t to_clear_args_mask;
17449               machine_mode arg_mode = TYPE_MODE (arg_type);
17450
17451               if (VOID_TYPE_P (arg_type))
17452                 continue;
17453
17454               if (!first_param)
17455                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17456                                           true);
17457
17458               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17459                                           true);
17460               gcc_assert (REG_P (arg_rtx));
17461               to_clear_args_mask
17462                 = compute_not_to_clear_mask (arg_type, arg_rtx,
17463                                              REGNO (arg_rtx),
17464                                              &padding_bits_to_clear[0]);
17465               if (to_clear_args_mask)
17466                 {
17467                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
17468                     {
17469                       if (to_clear_args_mask & (1ULL << regno))
17470                         bitmap_clear_bit (to_clear_bitmap, regno);
17471                     }
17472                 }
17473
17474               first_param = false;
17475             }
17476
17477           /* We use right shift and left shift to clear the LSB of the address
17478              we jump to instead of using bic, to avoid having to use an extra
17479              register on Thumb-1.  */
17480           clearing_reg = XEXP (address, 0);
17481           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17482           emit_insn (gen_rtx_SET (clearing_reg, shift));
17483           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17484           emit_insn (gen_rtx_SET (clearing_reg, shift));
17485
17486           /* Clear caller-saved registers that leak before doing a non-secure
17487              call.  */
17488           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17489           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17490                                 NUM_ARG_REGS, ip_reg, clearing_reg);
17491
17492           seq = get_insns ();
17493           end_sequence ();
17494           emit_insn_before (seq, insn);
17495         }
17496     }
17497 }
17498
17499 /* Rewrite move insn into subtract of 0 if the condition codes will
17500    be useful in next conditional jump insn.  */
17501
17502 static void
17503 thumb1_reorg (void)
17504 {
17505   basic_block bb;
17506
17507   FOR_EACH_BB_FN (bb, cfun)
17508     {
17509       rtx dest, src;
17510       rtx cmp, op0, op1, set = NULL;
17511       rtx_insn *prev, *insn = BB_END (bb);
17512       bool insn_clobbered = false;
17513
17514       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17515         insn = PREV_INSN (insn);
17516
17517       /* Find the last cbranchsi4_insn in basic block BB.  */
17518       if (insn == BB_HEAD (bb)
17519           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17520         continue;
17521
17522       /* Get the register with which we are comparing.  */
17523       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17524       op0 = XEXP (cmp, 0);
17525       op1 = XEXP (cmp, 1);
17526
17527       /* Check that comparison is against ZERO.  */
17528       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17529         continue;
17530
17531       /* Find the first flag setting insn before INSN in basic block BB.  */
17532       gcc_assert (insn != BB_HEAD (bb));
17533       for (prev = PREV_INSN (insn);
17534            (!insn_clobbered
17535             && prev != BB_HEAD (bb)
17536             && (NOTE_P (prev)
17537                 || DEBUG_INSN_P (prev)
17538                 || ((set = single_set (prev)) != NULL
17539                     && get_attr_conds (prev) == CONDS_NOCOND)));
17540            prev = PREV_INSN (prev))
17541         {
17542           if (reg_set_p (op0, prev))
17543             insn_clobbered = true;
17544         }
17545
17546       /* Skip if op0 is clobbered by insn other than prev. */
17547       if (insn_clobbered)
17548         continue;
17549
17550       if (!set)
17551         continue;
17552
17553       dest = SET_DEST (set);
17554       src = SET_SRC (set);
17555       if (!low_register_operand (dest, SImode)
17556           || !low_register_operand (src, SImode))
17557         continue;
17558
17559       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17560          in INSN.  Both src and dest of the move insn are checked.  */
17561       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17562         {
17563           dest = copy_rtx (dest);
17564           src = copy_rtx (src);
17565           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17566           PATTERN (prev) = gen_rtx_SET (dest, src);
17567           INSN_CODE (prev) = -1;
17568           /* Set test register in INSN to dest.  */
17569           XEXP (cmp, 0) = copy_rtx (dest);
17570           INSN_CODE (insn) = -1;
17571         }
17572     }
17573 }
17574
17575 /* Convert instructions to their cc-clobbering variant if possible, since
17576    that allows us to use smaller encodings.  */
17577
17578 static void
17579 thumb2_reorg (void)
17580 {
17581   basic_block bb;
17582   regset_head live;
17583
17584   INIT_REG_SET (&live);
17585
17586   /* We are freeing block_for_insn in the toplev to keep compatibility
17587      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17588   compute_bb_for_insn ();
17589   df_analyze ();
17590
17591   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17592
17593   FOR_EACH_BB_FN (bb, cfun)
17594     {
17595       if ((current_tune->disparage_flag_setting_t16_encodings
17596            == tune_params::DISPARAGE_FLAGS_ALL)
17597           && optimize_bb_for_speed_p (bb))
17598         continue;
17599
17600       rtx_insn *insn;
17601       Convert_Action action = SKIP;
17602       Convert_Action action_for_partial_flag_setting
17603         = ((current_tune->disparage_flag_setting_t16_encodings
17604             != tune_params::DISPARAGE_FLAGS_NEITHER)
17605            && optimize_bb_for_speed_p (bb))
17606           ? SKIP : CONV;
17607
17608       COPY_REG_SET (&live, DF_LR_OUT (bb));
17609       df_simulate_initialize_backwards (bb, &live);
17610       FOR_BB_INSNS_REVERSE (bb, insn)
17611         {
17612           if (NONJUMP_INSN_P (insn)
17613               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17614               && GET_CODE (PATTERN (insn)) == SET)
17615             {
17616               action = SKIP;
17617               rtx pat = PATTERN (insn);
17618               rtx dst = XEXP (pat, 0);
17619               rtx src = XEXP (pat, 1);
17620               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17621
17622               if (UNARY_P (src) || BINARY_P (src))
17623                   op0 = XEXP (src, 0);
17624
17625               if (BINARY_P (src))
17626                   op1 = XEXP (src, 1);
17627
17628               if (low_register_operand (dst, SImode))
17629                 {
17630                   switch (GET_CODE (src))
17631                     {
17632                     case PLUS:
17633                       /* Adding two registers and storing the result
17634                          in the first source is already a 16-bit
17635                          operation.  */
17636                       if (rtx_equal_p (dst, op0)
17637                           && register_operand (op1, SImode))
17638                         break;
17639
17640                       if (low_register_operand (op0, SImode))
17641                         {
17642                           /* ADDS <Rd>,<Rn>,<Rm>  */
17643                           if (low_register_operand (op1, SImode))
17644                             action = CONV;
17645                           /* ADDS <Rdn>,#<imm8>  */
17646                           /* SUBS <Rdn>,#<imm8>  */
17647                           else if (rtx_equal_p (dst, op0)
17648                                    && CONST_INT_P (op1)
17649                                    && IN_RANGE (INTVAL (op1), -255, 255))
17650                             action = CONV;
17651                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17652                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17653                           else if (CONST_INT_P (op1)
17654                                    && IN_RANGE (INTVAL (op1), -7, 7))
17655                             action = CONV;
17656                         }
17657                       /* ADCS <Rd>, <Rn>  */
17658                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17659                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17660                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17661                                                        SImode)
17662                               && COMPARISON_P (op1)
17663                               && cc_register (XEXP (op1, 0), VOIDmode)
17664                               && maybe_get_arm_condition_code (op1) == ARM_CS
17665                               && XEXP (op1, 1) == const0_rtx)
17666                         action = CONV;
17667                       break;
17668
17669                     case MINUS:
17670                       /* RSBS <Rd>,<Rn>,#0
17671                          Not handled here: see NEG below.  */
17672                       /* SUBS <Rd>,<Rn>,#<imm3>
17673                          SUBS <Rdn>,#<imm8>
17674                          Not handled here: see PLUS above.  */
17675                       /* SUBS <Rd>,<Rn>,<Rm>  */
17676                       if (low_register_operand (op0, SImode)
17677                           && low_register_operand (op1, SImode))
17678                             action = CONV;
17679                       break;
17680
17681                     case MULT:
17682                       /* MULS <Rdm>,<Rn>,<Rdm>
17683                          As an exception to the rule, this is only used
17684                          when optimizing for size since MULS is slow on all
17685                          known implementations.  We do not even want to use
17686                          MULS in cold code, if optimizing for speed, so we
17687                          test the global flag here.  */
17688                       if (!optimize_size)
17689                         break;
17690                       /* Fall through.  */
17691                     case AND:
17692                     case IOR:
17693                     case XOR:
17694                       /* ANDS <Rdn>,<Rm>  */
17695                       if (rtx_equal_p (dst, op0)
17696                           && low_register_operand (op1, SImode))
17697                         action = action_for_partial_flag_setting;
17698                       else if (rtx_equal_p (dst, op1)
17699                                && low_register_operand (op0, SImode))
17700                         action = action_for_partial_flag_setting == SKIP
17701                                  ? SKIP : SWAP_CONV;
17702                       break;
17703
17704                     case ASHIFTRT:
17705                     case ASHIFT:
17706                     case LSHIFTRT:
17707                       /* ASRS <Rdn>,<Rm> */
17708                       /* LSRS <Rdn>,<Rm> */
17709                       /* LSLS <Rdn>,<Rm> */
17710                       if (rtx_equal_p (dst, op0)
17711                           && low_register_operand (op1, SImode))
17712                         action = action_for_partial_flag_setting;
17713                       /* ASRS <Rd>,<Rm>,#<imm5> */
17714                       /* LSRS <Rd>,<Rm>,#<imm5> */
17715                       /* LSLS <Rd>,<Rm>,#<imm5> */
17716                       else if (low_register_operand (op0, SImode)
17717                                && CONST_INT_P (op1)
17718                                && IN_RANGE (INTVAL (op1), 0, 31))
17719                         action = action_for_partial_flag_setting;
17720                       break;
17721
17722                     case ROTATERT:
17723                       /* RORS <Rdn>,<Rm>  */
17724                       if (rtx_equal_p (dst, op0)
17725                           && low_register_operand (op1, SImode))
17726                         action = action_for_partial_flag_setting;
17727                       break;
17728
17729                     case NOT:
17730                       /* MVNS <Rd>,<Rm>  */
17731                       if (low_register_operand (op0, SImode))
17732                         action = action_for_partial_flag_setting;
17733                       break;
17734
17735                     case NEG:
17736                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17737                       if (low_register_operand (op0, SImode))
17738                         action = CONV;
17739                       break;
17740
17741                     case CONST_INT:
17742                       /* MOVS <Rd>,#<imm8>  */
17743                       if (CONST_INT_P (src)
17744                           && IN_RANGE (INTVAL (src), 0, 255))
17745                         action = action_for_partial_flag_setting;
17746                       break;
17747
17748                     case REG:
17749                       /* MOVS and MOV<c> with registers have different
17750                          encodings, so are not relevant here.  */
17751                       break;
17752
17753                     default:
17754                       break;
17755                     }
17756                 }
17757
17758               if (action != SKIP)
17759                 {
17760                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17761                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17762                   rtvec vec;
17763
17764                   if (action == SWAP_CONV)
17765                     {
17766                       src = copy_rtx (src);
17767                       XEXP (src, 0) = op1;
17768                       XEXP (src, 1) = op0;
17769                       pat = gen_rtx_SET (dst, src);
17770                       vec = gen_rtvec (2, pat, clobber);
17771                     }
17772                   else /* action == CONV */
17773                     vec = gen_rtvec (2, pat, clobber);
17774
17775                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17776                   INSN_CODE (insn) = -1;
17777                 }
17778             }
17779
17780           if (NONDEBUG_INSN_P (insn))
17781             df_simulate_one_insn_backwards (bb, insn, &live);
17782         }
17783     }
17784
17785   CLEAR_REG_SET (&live);
17786 }
17787
17788 /* Gcc puts the pool in the wrong place for ARM, since we can only
17789    load addresses a limited distance around the pc.  We do some
17790    special munging to move the constant pool values to the correct
17791    point in the code.  */
17792 static void
17793 arm_reorg (void)
17794 {
17795   rtx_insn *insn;
17796   HOST_WIDE_INT address = 0;
17797   Mfix * fix;
17798
17799   if (use_cmse)
17800     cmse_nonsecure_call_clear_caller_saved ();
17801
17802   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
17803   if (cfun->is_thunk)
17804     ;
17805   else if (TARGET_THUMB1)
17806     thumb1_reorg ();
17807   else if (TARGET_THUMB2)
17808     thumb2_reorg ();
17809
17810   /* Ensure all insns that must be split have been split at this point.
17811      Otherwise, the pool placement code below may compute incorrect
17812      insn lengths.  Note that when optimizing, all insns have already
17813      been split at this point.  */
17814   if (!optimize)
17815     split_all_insns_noflow ();
17816
17817   /* Make sure we do not attempt to create a literal pool even though it should
17818      no longer be necessary to create any.  */
17819   if (arm_disable_literal_pool)
17820     return ;
17821
17822   minipool_fix_head = minipool_fix_tail = NULL;
17823
17824   /* The first insn must always be a note, or the code below won't
17825      scan it properly.  */
17826   insn = get_insns ();
17827   gcc_assert (NOTE_P (insn));
17828   minipool_pad = 0;
17829
17830   /* Scan all the insns and record the operands that will need fixing.  */
17831   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17832     {
17833       if (BARRIER_P (insn))
17834         push_minipool_barrier (insn, address);
17835       else if (INSN_P (insn))
17836         {
17837           rtx_jump_table_data *table;
17838
17839           note_invalid_constants (insn, address, true);
17840           address += get_attr_length (insn);
17841
17842           /* If the insn is a vector jump, add the size of the table
17843              and skip the table.  */
17844           if (tablejump_p (insn, NULL, &table))
17845             {
17846               address += get_jump_table_size (table);
17847               insn = table;
17848             }
17849         }
17850       else if (LABEL_P (insn))
17851         /* Add the worst-case padding due to alignment.  We don't add
17852            the _current_ padding because the minipool insertions
17853            themselves might change it.  */
17854         address += get_label_padding (insn);
17855     }
17856
17857   fix = minipool_fix_head;
17858
17859   /* Now scan the fixups and perform the required changes.  */
17860   while (fix)
17861     {
17862       Mfix * ftmp;
17863       Mfix * fdel;
17864       Mfix *  last_added_fix;
17865       Mfix * last_barrier = NULL;
17866       Mfix * this_fix;
17867
17868       /* Skip any further barriers before the next fix.  */
17869       while (fix && BARRIER_P (fix->insn))
17870         fix = fix->next;
17871
17872       /* No more fixes.  */
17873       if (fix == NULL)
17874         break;
17875
17876       last_added_fix = NULL;
17877
17878       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17879         {
17880           if (BARRIER_P (ftmp->insn))
17881             {
17882               if (ftmp->address >= minipool_vector_head->max_address)
17883                 break;
17884
17885               last_barrier = ftmp;
17886             }
17887           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17888             break;
17889
17890           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17891         }
17892
17893       /* If we found a barrier, drop back to that; any fixes that we
17894          could have reached but come after the barrier will now go in
17895          the next mini-pool.  */
17896       if (last_barrier != NULL)
17897         {
17898           /* Reduce the refcount for those fixes that won't go into this
17899              pool after all.  */
17900           for (fdel = last_barrier->next;
17901                fdel && fdel != ftmp;
17902                fdel = fdel->next)
17903             {
17904               fdel->minipool->refcount--;
17905               fdel->minipool = NULL;
17906             }
17907
17908           ftmp = last_barrier;
17909         }
17910       else
17911         {
17912           /* ftmp is first fix that we can't fit into this pool and
17913              there no natural barriers that we could use.  Insert a
17914              new barrier in the code somewhere between the previous
17915              fix and this one, and arrange to jump around it.  */
17916           HOST_WIDE_INT max_address;
17917
17918           /* The last item on the list of fixes must be a barrier, so
17919              we can never run off the end of the list of fixes without
17920              last_barrier being set.  */
17921           gcc_assert (ftmp);
17922
17923           max_address = minipool_vector_head->max_address;
17924           /* Check that there isn't another fix that is in range that
17925              we couldn't fit into this pool because the pool was
17926              already too large: we need to put the pool before such an
17927              instruction.  The pool itself may come just after the
17928              fix because create_fix_barrier also allows space for a
17929              jump instruction.  */
17930           if (ftmp->address < max_address)
17931             max_address = ftmp->address + 1;
17932
17933           last_barrier = create_fix_barrier (last_added_fix, max_address);
17934         }
17935
17936       assign_minipool_offsets (last_barrier);
17937
17938       while (ftmp)
17939         {
17940           if (!BARRIER_P (ftmp->insn)
17941               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17942                   == NULL))
17943             break;
17944
17945           ftmp = ftmp->next;
17946         }
17947
17948       /* Scan over the fixes we have identified for this pool, fixing them
17949          up and adding the constants to the pool itself.  */
17950       for (this_fix = fix; this_fix && ftmp != this_fix;
17951            this_fix = this_fix->next)
17952         if (!BARRIER_P (this_fix->insn))
17953           {
17954             rtx addr
17955               = plus_constant (Pmode,
17956                                gen_rtx_LABEL_REF (VOIDmode,
17957                                                   minipool_vector_label),
17958                                this_fix->minipool->offset);
17959             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17960           }
17961
17962       dump_minipool (last_barrier->insn);
17963       fix = ftmp;
17964     }
17965
17966   /* From now on we must synthesize any constants that we can't handle
17967      directly.  This can happen if the RTL gets split during final
17968      instruction generation.  */
17969   cfun->machine->after_arm_reorg = 1;
17970
17971   /* Free the minipool memory.  */
17972   obstack_free (&minipool_obstack, minipool_startobj);
17973 }
17974 \f
17975 /* Routines to output assembly language.  */
17976
17977 /* Return string representation of passed in real value.  */
17978 static const char *
17979 fp_const_from_val (REAL_VALUE_TYPE *r)
17980 {
17981   if (!fp_consts_inited)
17982     init_fp_table ();
17983
17984   gcc_assert (real_equal (r, &value_fp0));
17985   return "0";
17986 }
17987
17988 /* OPERANDS[0] is the entire list of insns that constitute pop,
17989    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17990    is in the list, UPDATE is true iff the list contains explicit
17991    update of base register.  */
17992 void
17993 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17994                          bool update)
17995 {
17996   int i;
17997   char pattern[100];
17998   int offset;
17999   const char *conditional;
18000   int num_saves = XVECLEN (operands[0], 0);
18001   unsigned int regno;
18002   unsigned int regno_base = REGNO (operands[1]);
18003   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
18004
18005   offset = 0;
18006   offset += update ? 1 : 0;
18007   offset += return_pc ? 1 : 0;
18008
18009   /* Is the base register in the list?  */
18010   for (i = offset; i < num_saves; i++)
18011     {
18012       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
18013       /* If SP is in the list, then the base register must be SP.  */
18014       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
18015       /* If base register is in the list, there must be no explicit update.  */
18016       if (regno == regno_base)
18017         gcc_assert (!update);
18018     }
18019
18020   conditional = reverse ? "%?%D0" : "%?%d0";
18021   /* Can't use POP if returning from an interrupt.  */
18022   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
18023     sprintf (pattern, "pop%s\t{", conditional);
18024   else
18025     {
18026       /* Output ldmfd when the base register is SP, otherwise output ldmia.
18027          It's just a convention, their semantics are identical.  */
18028       if (regno_base == SP_REGNUM)
18029         sprintf (pattern, "ldmfd%s\t", conditional);
18030       else if (update)
18031         sprintf (pattern, "ldmia%s\t", conditional);
18032       else
18033         sprintf (pattern, "ldm%s\t", conditional);
18034
18035       strcat (pattern, reg_names[regno_base]);
18036       if (update)
18037         strcat (pattern, "!, {");
18038       else
18039         strcat (pattern, ", {");
18040     }
18041
18042   /* Output the first destination register.  */
18043   strcat (pattern,
18044           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
18045
18046   /* Output the rest of the destination registers.  */
18047   for (i = offset + 1; i < num_saves; i++)
18048     {
18049       strcat (pattern, ", ");
18050       strcat (pattern,
18051               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
18052     }
18053
18054   strcat (pattern, "}");
18055
18056   if (interrupt_p && return_pc)
18057     strcat (pattern, "^");
18058
18059   output_asm_insn (pattern, &cond);
18060 }
18061
18062
18063 /* Output the assembly for a store multiple.  */
18064
18065 const char *
18066 vfp_output_vstmd (rtx * operands)
18067 {
18068   char pattern[100];
18069   int p;
18070   int base;
18071   int i;
18072   rtx addr_reg = REG_P (XEXP (operands[0], 0))
18073                    ? XEXP (operands[0], 0)
18074                    : XEXP (XEXP (operands[0], 0), 0);
18075   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
18076
18077   if (push_p)
18078     strcpy (pattern, "vpush%?.64\t{%P1");
18079   else
18080     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
18081
18082   p = strlen (pattern);
18083
18084   gcc_assert (REG_P (operands[1]));
18085
18086   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
18087   for (i = 1; i < XVECLEN (operands[2], 0); i++)
18088     {
18089       p += sprintf (&pattern[p], ", d%d", base + i);
18090     }
18091   strcpy (&pattern[p], "}");
18092
18093   output_asm_insn (pattern, operands);
18094   return "";
18095 }
18096
18097
18098 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
18099    number of bytes pushed.  */
18100
18101 static int
18102 vfp_emit_fstmd (int base_reg, int count)
18103 {
18104   rtx par;
18105   rtx dwarf;
18106   rtx tmp, reg;
18107   int i;
18108
18109   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
18110      register pairs are stored by a store multiple insn.  We avoid this
18111      by pushing an extra pair.  */
18112   if (count == 2 && !arm_arch6)
18113     {
18114       if (base_reg == LAST_VFP_REGNUM - 3)
18115         base_reg -= 2;
18116       count++;
18117     }
18118
18119   /* FSTMD may not store more than 16 doubleword registers at once.  Split
18120      larger stores into multiple parts (up to a maximum of two, in
18121      practice).  */
18122   if (count > 16)
18123     {
18124       int saved;
18125       /* NOTE: base_reg is an internal register number, so each D register
18126          counts as 2.  */
18127       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18128       saved += vfp_emit_fstmd (base_reg, 16);
18129       return saved;
18130     }
18131
18132   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18133   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18134
18135   reg = gen_rtx_REG (DFmode, base_reg);
18136   base_reg += 2;
18137
18138   XVECEXP (par, 0, 0)
18139     = gen_rtx_SET (gen_frame_mem
18140                    (BLKmode,
18141                     gen_rtx_PRE_MODIFY (Pmode,
18142                                         stack_pointer_rtx,
18143                                         plus_constant
18144                                         (Pmode, stack_pointer_rtx,
18145                                          - (count * 8)))
18146                     ),
18147                    gen_rtx_UNSPEC (BLKmode,
18148                                    gen_rtvec (1, reg),
18149                                    UNSPEC_PUSH_MULT));
18150
18151   tmp = gen_rtx_SET (stack_pointer_rtx,
18152                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18153   RTX_FRAME_RELATED_P (tmp) = 1;
18154   XVECEXP (dwarf, 0, 0) = tmp;
18155
18156   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18157   RTX_FRAME_RELATED_P (tmp) = 1;
18158   XVECEXP (dwarf, 0, 1) = tmp;
18159
18160   for (i = 1; i < count; i++)
18161     {
18162       reg = gen_rtx_REG (DFmode, base_reg);
18163       base_reg += 2;
18164       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18165
18166       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18167                                         plus_constant (Pmode,
18168                                                        stack_pointer_rtx,
18169                                                        i * 8)),
18170                          reg);
18171       RTX_FRAME_RELATED_P (tmp) = 1;
18172       XVECEXP (dwarf, 0, i + 1) = tmp;
18173     }
18174
18175   par = emit_insn (par);
18176   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18177   RTX_FRAME_RELATED_P (par) = 1;
18178
18179   return count * 8;
18180 }
18181
18182 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18183    has the cmse_nonsecure_call attribute and returns false otherwise.  */
18184
18185 bool
18186 detect_cmse_nonsecure_call (tree addr)
18187 {
18188   if (!addr)
18189     return FALSE;
18190
18191   tree fntype = TREE_TYPE (addr);
18192   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18193                                     TYPE_ATTRIBUTES (fntype)))
18194     return TRUE;
18195   return FALSE;
18196 }
18197
18198
18199 /* Emit a call instruction with pattern PAT.  ADDR is the address of
18200    the call target.  */
18201
18202 void
18203 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18204 {
18205   rtx insn;
18206
18207   insn = emit_call_insn (pat);
18208
18209   /* The PIC register is live on entry to VxWorks PIC PLT entries.
18210      If the call might use such an entry, add a use of the PIC register
18211      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
18212   if (TARGET_VXWORKS_RTP
18213       && flag_pic
18214       && !sibcall
18215       && GET_CODE (addr) == SYMBOL_REF
18216       && (SYMBOL_REF_DECL (addr)
18217           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18218           : !SYMBOL_REF_LOCAL_P (addr)))
18219     {
18220       require_pic_register (NULL_RTX, false /*compute_now*/);
18221       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18222     }
18223
18224   if (TARGET_AAPCS_BASED)
18225     {
18226       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18227          linker.  We need to add an IP clobber to allow setting
18228          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
18229          is not needed since it's a fixed register.  */
18230       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18231       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18232     }
18233 }
18234
18235 /* Output a 'call' insn.  */
18236 const char *
18237 output_call (rtx *operands)
18238 {
18239   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
18240
18241   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
18242   if (REGNO (operands[0]) == LR_REGNUM)
18243     {
18244       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18245       output_asm_insn ("mov%?\t%0, %|lr", operands);
18246     }
18247
18248   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18249
18250   if (TARGET_INTERWORK || arm_arch4t)
18251     output_asm_insn ("bx%?\t%0", operands);
18252   else
18253     output_asm_insn ("mov%?\t%|pc, %0", operands);
18254
18255   return "";
18256 }
18257
18258 /* Output a move from arm registers to arm registers of a long double
18259    OPERANDS[0] is the destination.
18260    OPERANDS[1] is the source.  */
18261 const char *
18262 output_mov_long_double_arm_from_arm (rtx *operands)
18263 {
18264   /* We have to be careful here because the two might overlap.  */
18265   int dest_start = REGNO (operands[0]);
18266   int src_start = REGNO (operands[1]);
18267   rtx ops[2];
18268   int i;
18269
18270   if (dest_start < src_start)
18271     {
18272       for (i = 0; i < 3; i++)
18273         {
18274           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18275           ops[1] = gen_rtx_REG (SImode, src_start + i);
18276           output_asm_insn ("mov%?\t%0, %1", ops);
18277         }
18278     }
18279   else
18280     {
18281       for (i = 2; i >= 0; i--)
18282         {
18283           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18284           ops[1] = gen_rtx_REG (SImode, src_start + i);
18285           output_asm_insn ("mov%?\t%0, %1", ops);
18286         }
18287     }
18288
18289   return "";
18290 }
18291
18292 void
18293 arm_emit_movpair (rtx dest, rtx src)
18294  {
18295   /* If the src is an immediate, simplify it.  */
18296   if (CONST_INT_P (src))
18297     {
18298       HOST_WIDE_INT val = INTVAL (src);
18299       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18300       if ((val >> 16) & 0x0000ffff)
18301         {
18302           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18303                                                GEN_INT (16)),
18304                          GEN_INT ((val >> 16) & 0x0000ffff));
18305           rtx_insn *insn = get_last_insn ();
18306           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18307         }
18308       return;
18309     }
18310    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18311    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18312    rtx_insn *insn = get_last_insn ();
18313    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18314  }
18315
18316 /* Output a move between double words.  It must be REG<-MEM
18317    or MEM<-REG.  */
18318 const char *
18319 output_move_double (rtx *operands, bool emit, int *count)
18320 {
18321   enum rtx_code code0 = GET_CODE (operands[0]);
18322   enum rtx_code code1 = GET_CODE (operands[1]);
18323   rtx otherops[3];
18324   if (count)
18325     *count = 1;
18326
18327   /* The only case when this might happen is when
18328      you are looking at the length of a DImode instruction
18329      that has an invalid constant in it.  */
18330   if (code0 == REG && code1 != MEM)
18331     {
18332       gcc_assert (!emit);
18333       *count = 2;
18334       return "";
18335     }
18336
18337   if (code0 == REG)
18338     {
18339       unsigned int reg0 = REGNO (operands[0]);
18340
18341       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18342
18343       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18344
18345       switch (GET_CODE (XEXP (operands[1], 0)))
18346         {
18347         case REG:
18348
18349           if (emit)
18350             {
18351               if (TARGET_LDRD
18352                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18353                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18354               else
18355                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18356             }
18357           break;
18358
18359         case PRE_INC:
18360           gcc_assert (TARGET_LDRD);
18361           if (emit)
18362             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18363           break;
18364
18365         case PRE_DEC:
18366           if (emit)
18367             {
18368               if (TARGET_LDRD)
18369                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18370               else
18371                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18372             }
18373           break;
18374
18375         case POST_INC:
18376           if (emit)
18377             {
18378               if (TARGET_LDRD)
18379                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18380               else
18381                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18382             }
18383           break;
18384
18385         case POST_DEC:
18386           gcc_assert (TARGET_LDRD);
18387           if (emit)
18388             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18389           break;
18390
18391         case PRE_MODIFY:
18392         case POST_MODIFY:
18393           /* Autoicrement addressing modes should never have overlapping
18394              base and destination registers, and overlapping index registers
18395              are already prohibited, so this doesn't need to worry about
18396              fix_cm3_ldrd.  */
18397           otherops[0] = operands[0];
18398           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18399           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18400
18401           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18402             {
18403               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18404                 {
18405                   /* Registers overlap so split out the increment.  */
18406                   if (emit)
18407                     {
18408                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18409                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18410                     }
18411                   if (count)
18412                     *count = 2;
18413                 }
18414               else
18415                 {
18416                   /* Use a single insn if we can.
18417                      FIXME: IWMMXT allows offsets larger than ldrd can
18418                      handle, fix these up with a pair of ldr.  */
18419                   if (TARGET_THUMB2
18420                       || !CONST_INT_P (otherops[2])
18421                       || (INTVAL (otherops[2]) > -256
18422                           && INTVAL (otherops[2]) < 256))
18423                     {
18424                       if (emit)
18425                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18426                     }
18427                   else
18428                     {
18429                       if (emit)
18430                         {
18431                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18432                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18433                         }
18434                       if (count)
18435                         *count = 2;
18436
18437                     }
18438                 }
18439             }
18440           else
18441             {
18442               /* Use a single insn if we can.
18443                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18444                  fix these up with a pair of ldr.  */
18445               if (TARGET_THUMB2
18446                   || !CONST_INT_P (otherops[2])
18447                   || (INTVAL (otherops[2]) > -256
18448                       && INTVAL (otherops[2]) < 256))
18449                 {
18450                   if (emit)
18451                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18452                 }
18453               else
18454                 {
18455                   if (emit)
18456                     {
18457                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18458                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18459                     }
18460                   if (count)
18461                     *count = 2;
18462                 }
18463             }
18464           break;
18465
18466         case LABEL_REF:
18467         case CONST:
18468           /* We might be able to use ldrd %0, %1 here.  However the range is
18469              different to ldr/adr, and it is broken on some ARMv7-M
18470              implementations.  */
18471           /* Use the second register of the pair to avoid problematic
18472              overlap.  */
18473           otherops[1] = operands[1];
18474           if (emit)
18475             output_asm_insn ("adr%?\t%0, %1", otherops);
18476           operands[1] = otherops[0];
18477           if (emit)
18478             {
18479               if (TARGET_LDRD)
18480                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18481               else
18482                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18483             }
18484
18485           if (count)
18486             *count = 2;
18487           break;
18488
18489           /* ??? This needs checking for thumb2.  */
18490         default:
18491           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18492                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18493             {
18494               otherops[0] = operands[0];
18495               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18496               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18497
18498               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18499                 {
18500                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18501                     {
18502                       switch ((int) INTVAL (otherops[2]))
18503                         {
18504                         case -8:
18505                           if (emit)
18506                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18507                           return "";
18508                         case -4:
18509                           if (TARGET_THUMB2)
18510                             break;
18511                           if (emit)
18512                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18513                           return "";
18514                         case 4:
18515                           if (TARGET_THUMB2)
18516                             break;
18517                           if (emit)
18518                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18519                           return "";
18520                         }
18521                     }
18522                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18523                   operands[1] = otherops[0];
18524                   if (TARGET_LDRD
18525                       && (REG_P (otherops[2])
18526                           || TARGET_THUMB2
18527                           || (CONST_INT_P (otherops[2])
18528                               && INTVAL (otherops[2]) > -256
18529                               && INTVAL (otherops[2]) < 256)))
18530                     {
18531                       if (reg_overlap_mentioned_p (operands[0],
18532                                                    otherops[2]))
18533                         {
18534                           /* Swap base and index registers over to
18535                              avoid a conflict.  */
18536                           std::swap (otherops[1], otherops[2]);
18537                         }
18538                       /* If both registers conflict, it will usually
18539                          have been fixed by a splitter.  */
18540                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18541                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18542                         {
18543                           if (emit)
18544                             {
18545                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18546                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18547                             }
18548                           if (count)
18549                             *count = 2;
18550                         }
18551                       else
18552                         {
18553                           otherops[0] = operands[0];
18554                           if (emit)
18555                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18556                         }
18557                       return "";
18558                     }
18559
18560                   if (CONST_INT_P (otherops[2]))
18561                     {
18562                       if (emit)
18563                         {
18564                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18565                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18566                           else
18567                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18568                         }
18569                     }
18570                   else
18571                     {
18572                       if (emit)
18573                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18574                     }
18575                 }
18576               else
18577                 {
18578                   if (emit)
18579                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18580                 }
18581
18582               if (count)
18583                 *count = 2;
18584
18585               if (TARGET_LDRD)
18586                 return "ldrd%?\t%0, [%1]";
18587
18588               return "ldmia%?\t%1, %M0";
18589             }
18590           else
18591             {
18592               otherops[1] = adjust_address (operands[1], SImode, 4);
18593               /* Take care of overlapping base/data reg.  */
18594               if (reg_mentioned_p (operands[0], operands[1]))
18595                 {
18596                   if (emit)
18597                     {
18598                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18599                       output_asm_insn ("ldr%?\t%0, %1", operands);
18600                     }
18601                   if (count)
18602                     *count = 2;
18603
18604                 }
18605               else
18606                 {
18607                   if (emit)
18608                     {
18609                       output_asm_insn ("ldr%?\t%0, %1", operands);
18610                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18611                     }
18612                   if (count)
18613                     *count = 2;
18614                 }
18615             }
18616         }
18617     }
18618   else
18619     {
18620       /* Constraints should ensure this.  */
18621       gcc_assert (code0 == MEM && code1 == REG);
18622       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18623                   || (TARGET_ARM && TARGET_LDRD));
18624
18625       /* For TARGET_ARM the first source register of an STRD
18626          must be even.  This is usually the case for double-word
18627          values but user assembly constraints can force an odd
18628          starting register.  */
18629       bool allow_strd = TARGET_LDRD
18630                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18631       switch (GET_CODE (XEXP (operands[0], 0)))
18632         {
18633         case REG:
18634           if (emit)
18635             {
18636               if (allow_strd)
18637                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18638               else
18639                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18640             }
18641           break;
18642
18643         case PRE_INC:
18644           gcc_assert (allow_strd);
18645           if (emit)
18646             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18647           break;
18648
18649         case PRE_DEC:
18650           if (emit)
18651             {
18652               if (allow_strd)
18653                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18654               else
18655                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18656             }
18657           break;
18658
18659         case POST_INC:
18660           if (emit)
18661             {
18662               if (allow_strd)
18663                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18664               else
18665                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18666             }
18667           break;
18668
18669         case POST_DEC:
18670           gcc_assert (allow_strd);
18671           if (emit)
18672             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18673           break;
18674
18675         case PRE_MODIFY:
18676         case POST_MODIFY:
18677           otherops[0] = operands[1];
18678           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18679           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18680
18681           /* IWMMXT allows offsets larger than strd can handle,
18682              fix these up with a pair of str.  */
18683           if (!TARGET_THUMB2
18684               && CONST_INT_P (otherops[2])
18685               && (INTVAL(otherops[2]) <= -256
18686                   || INTVAL(otherops[2]) >= 256))
18687             {
18688               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18689                 {
18690                   if (emit)
18691                     {
18692                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18693                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18694                     }
18695                   if (count)
18696                     *count = 2;
18697                 }
18698               else
18699                 {
18700                   if (emit)
18701                     {
18702                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18703                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18704                     }
18705                   if (count)
18706                     *count = 2;
18707                 }
18708             }
18709           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18710             {
18711               if (emit)
18712                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18713             }
18714           else
18715             {
18716               if (emit)
18717                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18718             }
18719           break;
18720
18721         case PLUS:
18722           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18723           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18724             {
18725               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18726                 {
18727                 case -8:
18728                   if (emit)
18729                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18730                   return "";
18731
18732                 case -4:
18733                   if (TARGET_THUMB2)
18734                     break;
18735                   if (emit)
18736                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18737                   return "";
18738
18739                 case 4:
18740                   if (TARGET_THUMB2)
18741                     break;
18742                   if (emit)
18743                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18744                   return "";
18745                 }
18746             }
18747           if (allow_strd
18748               && (REG_P (otherops[2])
18749                   || TARGET_THUMB2
18750                   || (CONST_INT_P (otherops[2])
18751                       && INTVAL (otherops[2]) > -256
18752                       && INTVAL (otherops[2]) < 256)))
18753             {
18754               otherops[0] = operands[1];
18755               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18756               if (emit)
18757                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18758               return "";
18759             }
18760           /* Fall through */
18761
18762         default:
18763           otherops[0] = adjust_address (operands[0], SImode, 4);
18764           otherops[1] = operands[1];
18765           if (emit)
18766             {
18767               output_asm_insn ("str%?\t%1, %0", operands);
18768               output_asm_insn ("str%?\t%H1, %0", otherops);
18769             }
18770           if (count)
18771             *count = 2;
18772         }
18773     }
18774
18775   return "";
18776 }
18777
18778 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18779    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18780
18781 const char *
18782 output_move_quad (rtx *operands)
18783 {
18784   if (REG_P (operands[0]))
18785     {
18786       /* Load, or reg->reg move.  */
18787
18788       if (MEM_P (operands[1]))
18789         {
18790           switch (GET_CODE (XEXP (operands[1], 0)))
18791             {
18792             case REG:
18793               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18794               break;
18795
18796             case LABEL_REF:
18797             case CONST:
18798               output_asm_insn ("adr%?\t%0, %1", operands);
18799               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18800               break;
18801
18802             default:
18803               gcc_unreachable ();
18804             }
18805         }
18806       else
18807         {
18808           rtx ops[2];
18809           int dest, src, i;
18810
18811           gcc_assert (REG_P (operands[1]));
18812
18813           dest = REGNO (operands[0]);
18814           src = REGNO (operands[1]);
18815
18816           /* This seems pretty dumb, but hopefully GCC won't try to do it
18817              very often.  */
18818           if (dest < src)
18819             for (i = 0; i < 4; i++)
18820               {
18821                 ops[0] = gen_rtx_REG (SImode, dest + i);
18822                 ops[1] = gen_rtx_REG (SImode, src + i);
18823                 output_asm_insn ("mov%?\t%0, %1", ops);
18824               }
18825           else
18826             for (i = 3; i >= 0; i--)
18827               {
18828                 ops[0] = gen_rtx_REG (SImode, dest + i);
18829                 ops[1] = gen_rtx_REG (SImode, src + i);
18830                 output_asm_insn ("mov%?\t%0, %1", ops);
18831               }
18832         }
18833     }
18834   else
18835     {
18836       gcc_assert (MEM_P (operands[0]));
18837       gcc_assert (REG_P (operands[1]));
18838       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18839
18840       switch (GET_CODE (XEXP (operands[0], 0)))
18841         {
18842         case REG:
18843           output_asm_insn ("stm%?\t%m0, %M1", operands);
18844           break;
18845
18846         default:
18847           gcc_unreachable ();
18848         }
18849     }
18850
18851   return "";
18852 }
18853
18854 /* Output a VFP load or store instruction.  */
18855
18856 const char *
18857 output_move_vfp (rtx *operands)
18858 {
18859   rtx reg, mem, addr, ops[2];
18860   int load = REG_P (operands[0]);
18861   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18862   int sp = (!TARGET_VFP_FP16INST
18863             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18864   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18865   const char *templ;
18866   char buff[50];
18867   machine_mode mode;
18868
18869   reg = operands[!load];
18870   mem = operands[load];
18871
18872   mode = GET_MODE (reg);
18873
18874   gcc_assert (REG_P (reg));
18875   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18876   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18877               || mode == SFmode
18878               || mode == DFmode
18879               || mode == HImode
18880               || mode == SImode
18881               || mode == DImode
18882               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18883   gcc_assert (MEM_P (mem));
18884
18885   addr = XEXP (mem, 0);
18886
18887   switch (GET_CODE (addr))
18888     {
18889     case PRE_DEC:
18890       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18891       ops[0] = XEXP (addr, 0);
18892       ops[1] = reg;
18893       break;
18894
18895     case POST_INC:
18896       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18897       ops[0] = XEXP (addr, 0);
18898       ops[1] = reg;
18899       break;
18900
18901     default:
18902       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18903       ops[0] = reg;
18904       ops[1] = mem;
18905       break;
18906     }
18907
18908   sprintf (buff, templ,
18909            load ? "ld" : "st",
18910            dp ? "64" : sp ? "32" : "16",
18911            dp ? "P" : "",
18912            integer_p ? "\t%@ int" : "");
18913   output_asm_insn (buff, ops);
18914
18915   return "";
18916 }
18917
18918 /* Output a Neon double-word or quad-word load or store, or a load
18919    or store for larger structure modes.
18920
18921    WARNING: The ordering of elements is weird in big-endian mode,
18922    because the EABI requires that vectors stored in memory appear
18923    as though they were stored by a VSTM, as required by the EABI.
18924    GCC RTL defines element ordering based on in-memory order.
18925    This can be different from the architectural ordering of elements
18926    within a NEON register. The intrinsics defined in arm_neon.h use the
18927    NEON register element ordering, not the GCC RTL element ordering.
18928
18929    For example, the in-memory ordering of a big-endian a quadword
18930    vector with 16-bit elements when stored from register pair {d0,d1}
18931    will be (lowest address first, d0[N] is NEON register element N):
18932
18933      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18934
18935    When necessary, quadword registers (dN, dN+1) are moved to ARM
18936    registers from rN in the order:
18937
18938      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18939
18940    So that STM/LDM can be used on vectors in ARM registers, and the
18941    same memory layout will result as if VSTM/VLDM were used.
18942
18943    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18944    possible, which allows use of appropriate alignment tags.
18945    Note that the choice of "64" is independent of the actual vector
18946    element size; this size simply ensures that the behavior is
18947    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18948
18949    Due to limitations of those instructions, use of VST1.64/VLD1.64
18950    is not possible if:
18951     - the address contains PRE_DEC, or
18952     - the mode refers to more than 4 double-word registers
18953
18954    In those cases, it would be possible to replace VSTM/VLDM by a
18955    sequence of instructions; this is not currently implemented since
18956    this is not certain to actually improve performance.  */
18957
18958 const char *
18959 output_move_neon (rtx *operands)
18960 {
18961   rtx reg, mem, addr, ops[2];
18962   int regno, nregs, load = REG_P (operands[0]);
18963   const char *templ;
18964   char buff[50];
18965   machine_mode mode;
18966
18967   reg = operands[!load];
18968   mem = operands[load];
18969
18970   mode = GET_MODE (reg);
18971
18972   gcc_assert (REG_P (reg));
18973   regno = REGNO (reg);
18974   nregs = REG_NREGS (reg) / 2;
18975   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18976               || NEON_REGNO_OK_FOR_QUAD (regno));
18977   gcc_assert (VALID_NEON_DREG_MODE (mode)
18978               || VALID_NEON_QREG_MODE (mode)
18979               || VALID_NEON_STRUCT_MODE (mode));
18980   gcc_assert (MEM_P (mem));
18981
18982   addr = XEXP (mem, 0);
18983
18984   /* Strip off const from addresses like (const (plus (...))).  */
18985   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18986     addr = XEXP (addr, 0);
18987
18988   switch (GET_CODE (addr))
18989     {
18990     case POST_INC:
18991       /* We have to use vldm / vstm for too-large modes.  */
18992       if (nregs > 4)
18993         {
18994           templ = "v%smia%%?\t%%0!, %%h1";
18995           ops[0] = XEXP (addr, 0);
18996         }
18997       else
18998         {
18999           templ = "v%s1.64\t%%h1, %%A0";
19000           ops[0] = mem;
19001         }
19002       ops[1] = reg;
19003       break;
19004
19005     case PRE_DEC:
19006       /* We have to use vldm / vstm in this case, since there is no
19007          pre-decrement form of the vld1 / vst1 instructions.  */
19008       templ = "v%smdb%%?\t%%0!, %%h1";
19009       ops[0] = XEXP (addr, 0);
19010       ops[1] = reg;
19011       break;
19012
19013     case POST_MODIFY:
19014       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
19015       gcc_unreachable ();
19016
19017     case REG:
19018       /* We have to use vldm / vstm for too-large modes.  */
19019       if (nregs > 1)
19020         {
19021           if (nregs > 4)
19022             templ = "v%smia%%?\t%%m0, %%h1";
19023           else
19024             templ = "v%s1.64\t%%h1, %%A0";
19025
19026           ops[0] = mem;
19027           ops[1] = reg;
19028           break;
19029         }
19030       /* Fall through.  */
19031     case LABEL_REF:
19032     case PLUS:
19033       {
19034         int i;
19035         int overlap = -1;
19036         for (i = 0; i < nregs; i++)
19037           {
19038             /* We're only using DImode here because it's a convenient size.  */
19039             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
19040             ops[1] = adjust_address (mem, DImode, 8 * i);
19041             if (reg_overlap_mentioned_p (ops[0], mem))
19042               {
19043                 gcc_assert (overlap == -1);
19044                 overlap = i;
19045               }
19046             else
19047               {
19048                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19049                 output_asm_insn (buff, ops);
19050               }
19051           }
19052         if (overlap != -1)
19053           {
19054             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
19055             ops[1] = adjust_address (mem, SImode, 8 * overlap);
19056             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19057             output_asm_insn (buff, ops);
19058           }
19059
19060         return "";
19061       }
19062
19063     default:
19064       gcc_unreachable ();
19065     }
19066
19067   sprintf (buff, templ, load ? "ld" : "st");
19068   output_asm_insn (buff, ops);
19069
19070   return "";
19071 }
19072
19073 /* Compute and return the length of neon_mov<mode>, where <mode> is
19074    one of VSTRUCT modes: EI, OI, CI or XI.  */
19075 int
19076 arm_attr_length_move_neon (rtx_insn *insn)
19077 {
19078   rtx reg, mem, addr;
19079   int load;
19080   machine_mode mode;
19081
19082   extract_insn_cached (insn);
19083
19084   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
19085     {
19086       mode = GET_MODE (recog_data.operand[0]);
19087       switch (mode)
19088         {
19089         case E_EImode:
19090         case E_OImode:
19091           return 8;
19092         case E_CImode:
19093           return 12;
19094         case E_XImode:
19095           return 16;
19096         default:
19097           gcc_unreachable ();
19098         }
19099     }
19100
19101   load = REG_P (recog_data.operand[0]);
19102   reg = recog_data.operand[!load];
19103   mem = recog_data.operand[load];
19104
19105   gcc_assert (MEM_P (mem));
19106
19107   addr = XEXP (mem, 0);
19108
19109   /* Strip off const from addresses like (const (plus (...))).  */
19110   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19111     addr = XEXP (addr, 0);
19112
19113   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19114     {
19115       int insns = REG_NREGS (reg) / 2;
19116       return insns * 4;
19117     }
19118   else
19119     return 4;
19120 }
19121
19122 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
19123    return zero.  */
19124
19125 int
19126 arm_address_offset_is_imm (rtx_insn *insn)
19127 {
19128   rtx mem, addr;
19129
19130   extract_insn_cached (insn);
19131
19132   if (REG_P (recog_data.operand[0]))
19133     return 0;
19134
19135   mem = recog_data.operand[0];
19136
19137   gcc_assert (MEM_P (mem));
19138
19139   addr = XEXP (mem, 0);
19140
19141   if (REG_P (addr)
19142       || (GET_CODE (addr) == PLUS
19143           && REG_P (XEXP (addr, 0))
19144           && CONST_INT_P (XEXP (addr, 1))))
19145     return 1;
19146   else
19147     return 0;
19148 }
19149
19150 /* Output an ADD r, s, #n where n may be too big for one instruction.
19151    If adding zero to one register, output nothing.  */
19152 const char *
19153 output_add_immediate (rtx *operands)
19154 {
19155   HOST_WIDE_INT n = INTVAL (operands[2]);
19156
19157   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19158     {
19159       if (n < 0)
19160         output_multi_immediate (operands,
19161                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19162                                 -n);
19163       else
19164         output_multi_immediate (operands,
19165                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19166                                 n);
19167     }
19168
19169   return "";
19170 }
19171
19172 /* Output a multiple immediate operation.
19173    OPERANDS is the vector of operands referred to in the output patterns.
19174    INSTR1 is the output pattern to use for the first constant.
19175    INSTR2 is the output pattern to use for subsequent constants.
19176    IMMED_OP is the index of the constant slot in OPERANDS.
19177    N is the constant value.  */
19178 static const char *
19179 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19180                         int immed_op, HOST_WIDE_INT n)
19181 {
19182 #if HOST_BITS_PER_WIDE_INT > 32
19183   n &= 0xffffffff;
19184 #endif
19185
19186   if (n == 0)
19187     {
19188       /* Quick and easy output.  */
19189       operands[immed_op] = const0_rtx;
19190       output_asm_insn (instr1, operands);
19191     }
19192   else
19193     {
19194       int i;
19195       const char * instr = instr1;
19196
19197       /* Note that n is never zero here (which would give no output).  */
19198       for (i = 0; i < 32; i += 2)
19199         {
19200           if (n & (3 << i))
19201             {
19202               operands[immed_op] = GEN_INT (n & (255 << i));
19203               output_asm_insn (instr, operands);
19204               instr = instr2;
19205               i += 6;
19206             }
19207         }
19208     }
19209
19210   return "";
19211 }
19212
19213 /* Return the name of a shifter operation.  */
19214 static const char *
19215 arm_shift_nmem(enum rtx_code code)
19216 {
19217   switch (code)
19218     {
19219     case ASHIFT:
19220       return ARM_LSL_NAME;
19221
19222     case ASHIFTRT:
19223       return "asr";
19224
19225     case LSHIFTRT:
19226       return "lsr";
19227
19228     case ROTATERT:
19229       return "ror";
19230
19231     default:
19232       abort();
19233     }
19234 }
19235
19236 /* Return the appropriate ARM instruction for the operation code.
19237    The returned result should not be overwritten.  OP is the rtx of the
19238    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19239    was shifted.  */
19240 const char *
19241 arithmetic_instr (rtx op, int shift_first_arg)
19242 {
19243   switch (GET_CODE (op))
19244     {
19245     case PLUS:
19246       return "add";
19247
19248     case MINUS:
19249       return shift_first_arg ? "rsb" : "sub";
19250
19251     case IOR:
19252       return "orr";
19253
19254     case XOR:
19255       return "eor";
19256
19257     case AND:
19258       return "and";
19259
19260     case ASHIFT:
19261     case ASHIFTRT:
19262     case LSHIFTRT:
19263     case ROTATERT:
19264       return arm_shift_nmem(GET_CODE(op));
19265
19266     default:
19267       gcc_unreachable ();
19268     }
19269 }
19270
19271 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19272    for the operation code.  The returned result should not be overwritten.
19273    OP is the rtx code of the shift.
19274    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19275    shift.  */
19276 static const char *
19277 shift_op (rtx op, HOST_WIDE_INT *amountp)
19278 {
19279   const char * mnem;
19280   enum rtx_code code = GET_CODE (op);
19281
19282   switch (code)
19283     {
19284     case ROTATE:
19285       if (!CONST_INT_P (XEXP (op, 1)))
19286         {
19287           output_operand_lossage ("invalid shift operand");
19288           return NULL;
19289         }
19290
19291       code = ROTATERT;
19292       *amountp = 32 - INTVAL (XEXP (op, 1));
19293       mnem = "ror";
19294       break;
19295
19296     case ASHIFT:
19297     case ASHIFTRT:
19298     case LSHIFTRT:
19299     case ROTATERT:
19300       mnem = arm_shift_nmem(code);
19301       if (CONST_INT_P (XEXP (op, 1)))
19302         {
19303           *amountp = INTVAL (XEXP (op, 1));
19304         }
19305       else if (REG_P (XEXP (op, 1)))
19306         {
19307           *amountp = -1;
19308           return mnem;
19309         }
19310       else
19311         {
19312           output_operand_lossage ("invalid shift operand");
19313           return NULL;
19314         }
19315       break;
19316
19317     case MULT:
19318       /* We never have to worry about the amount being other than a
19319          power of 2, since this case can never be reloaded from a reg.  */
19320       if (!CONST_INT_P (XEXP (op, 1)))
19321         {
19322           output_operand_lossage ("invalid shift operand");
19323           return NULL;
19324         }
19325
19326       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19327
19328       /* Amount must be a power of two.  */
19329       if (*amountp & (*amountp - 1))
19330         {
19331           output_operand_lossage ("invalid shift operand");
19332           return NULL;
19333         }
19334
19335       *amountp = exact_log2 (*amountp);
19336       gcc_assert (IN_RANGE (*amountp, 0, 31));
19337       return ARM_LSL_NAME;
19338
19339     default:
19340       output_operand_lossage ("invalid shift operand");
19341       return NULL;
19342     }
19343
19344   /* This is not 100% correct, but follows from the desire to merge
19345      multiplication by a power of 2 with the recognizer for a
19346      shift.  >=32 is not a valid shift for "lsl", so we must try and
19347      output a shift that produces the correct arithmetical result.
19348      Using lsr #32 is identical except for the fact that the carry bit
19349      is not set correctly if we set the flags; but we never use the
19350      carry bit from such an operation, so we can ignore that.  */
19351   if (code == ROTATERT)
19352     /* Rotate is just modulo 32.  */
19353     *amountp &= 31;
19354   else if (*amountp != (*amountp & 31))
19355     {
19356       if (code == ASHIFT)
19357         mnem = "lsr";
19358       *amountp = 32;
19359     }
19360
19361   /* Shifts of 0 are no-ops.  */
19362   if (*amountp == 0)
19363     return NULL;
19364
19365   return mnem;
19366 }
19367
19368 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19369    because /bin/as is horribly restrictive.  The judgement about
19370    whether or not each character is 'printable' (and can be output as
19371    is) or not (and must be printed with an octal escape) must be made
19372    with reference to the *host* character set -- the situation is
19373    similar to that discussed in the comments above pp_c_char in
19374    c-pretty-print.c.  */
19375
19376 #define MAX_ASCII_LEN 51
19377
19378 void
19379 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19380 {
19381   int i;
19382   int len_so_far = 0;
19383
19384   fputs ("\t.ascii\t\"", stream);
19385
19386   for (i = 0; i < len; i++)
19387     {
19388       int c = p[i];
19389
19390       if (len_so_far >= MAX_ASCII_LEN)
19391         {
19392           fputs ("\"\n\t.ascii\t\"", stream);
19393           len_so_far = 0;
19394         }
19395
19396       if (ISPRINT (c))
19397         {
19398           if (c == '\\' || c == '\"')
19399             {
19400               putc ('\\', stream);
19401               len_so_far++;
19402             }
19403           putc (c, stream);
19404           len_so_far++;
19405         }
19406       else
19407         {
19408           fprintf (stream, "\\%03o", c);
19409           len_so_far += 4;
19410         }
19411     }
19412
19413   fputs ("\"\n", stream);
19414 }
19415 \f
19416
19417 /* Compute the register save mask for registers 0 through 12
19418    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19419
19420 static unsigned long
19421 arm_compute_save_reg0_reg12_mask (void)
19422 {
19423   unsigned long func_type = arm_current_func_type ();
19424   unsigned long save_reg_mask = 0;
19425   unsigned int reg;
19426
19427   if (IS_INTERRUPT (func_type))
19428     {
19429       unsigned int max_reg;
19430       /* Interrupt functions must not corrupt any registers,
19431          even call clobbered ones.  If this is a leaf function
19432          we can just examine the registers used by the RTL, but
19433          otherwise we have to assume that whatever function is
19434          called might clobber anything, and so we have to save
19435          all the call-clobbered registers as well.  */
19436       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19437         /* FIQ handlers have registers r8 - r12 banked, so
19438            we only need to check r0 - r7, Normal ISRs only
19439            bank r14 and r15, so we must check up to r12.
19440            r13 is the stack pointer which is always preserved,
19441            so we do not need to consider it here.  */
19442         max_reg = 7;
19443       else
19444         max_reg = 12;
19445
19446       for (reg = 0; reg <= max_reg; reg++)
19447         if (df_regs_ever_live_p (reg)
19448             || (! crtl->is_leaf && call_used_regs[reg]))
19449           save_reg_mask |= (1 << reg);
19450
19451       /* Also save the pic base register if necessary.  */
19452       if (flag_pic
19453           && !TARGET_SINGLE_PIC_BASE
19454           && arm_pic_register != INVALID_REGNUM
19455           && crtl->uses_pic_offset_table)
19456         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19457     }
19458   else if (IS_VOLATILE(func_type))
19459     {
19460       /* For noreturn functions we historically omitted register saves
19461          altogether.  However this really messes up debugging.  As a
19462          compromise save just the frame pointers.  Combined with the link
19463          register saved elsewhere this should be sufficient to get
19464          a backtrace.  */
19465       if (frame_pointer_needed)
19466         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19467       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19468         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19469       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19470         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19471     }
19472   else
19473     {
19474       /* In the normal case we only need to save those registers
19475          which are call saved and which are used by this function.  */
19476       for (reg = 0; reg <= 11; reg++)
19477         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19478           save_reg_mask |= (1 << reg);
19479
19480       /* Handle the frame pointer as a special case.  */
19481       if (frame_pointer_needed)
19482         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19483
19484       /* If we aren't loading the PIC register,
19485          don't stack it even though it may be live.  */
19486       if (flag_pic
19487           && !TARGET_SINGLE_PIC_BASE
19488           && arm_pic_register != INVALID_REGNUM
19489           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19490               || crtl->uses_pic_offset_table))
19491         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19492
19493       /* The prologue will copy SP into R0, so save it.  */
19494       if (IS_STACKALIGN (func_type))
19495         save_reg_mask |= 1;
19496     }
19497
19498   /* Save registers so the exception handler can modify them.  */
19499   if (crtl->calls_eh_return)
19500     {
19501       unsigned int i;
19502
19503       for (i = 0; ; i++)
19504         {
19505           reg = EH_RETURN_DATA_REGNO (i);
19506           if (reg == INVALID_REGNUM)
19507             break;
19508           save_reg_mask |= 1 << reg;
19509         }
19510     }
19511
19512   return save_reg_mask;
19513 }
19514
19515 /* Return true if r3 is live at the start of the function.  */
19516
19517 static bool
19518 arm_r3_live_at_start_p (void)
19519 {
19520   /* Just look at cfg info, which is still close enough to correct at this
19521      point.  This gives false positives for broken functions that might use
19522      uninitialized data that happens to be allocated in r3, but who cares?  */
19523   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19524 }
19525
19526 /* Compute the number of bytes used to store the static chain register on the
19527    stack, above the stack frame.  We need to know this accurately to get the
19528    alignment of the rest of the stack frame correct.  */
19529
19530 static int
19531 arm_compute_static_chain_stack_bytes (void)
19532 {
19533   /* Once the value is updated from the init value of -1, do not
19534      re-compute.  */
19535   if (cfun->machine->static_chain_stack_bytes != -1)
19536     return cfun->machine->static_chain_stack_bytes;
19537
19538   /* See the defining assertion in arm_expand_prologue.  */
19539   if (IS_NESTED (arm_current_func_type ())
19540       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19541           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19542                || flag_stack_clash_protection)
19543               && !df_regs_ever_live_p (LR_REGNUM)))
19544       && arm_r3_live_at_start_p ()
19545       && crtl->args.pretend_args_size == 0)
19546     return 4;
19547
19548   return 0;
19549 }
19550
19551 /* Compute a bit mask of which core registers need to be
19552    saved on the stack for the current function.
19553    This is used by arm_compute_frame_layout, which may add extra registers.  */
19554
19555 static unsigned long
19556 arm_compute_save_core_reg_mask (void)
19557 {
19558   unsigned int save_reg_mask = 0;
19559   unsigned long func_type = arm_current_func_type ();
19560   unsigned int reg;
19561
19562   if (IS_NAKED (func_type))
19563     /* This should never really happen.  */
19564     return 0;
19565
19566   /* If we are creating a stack frame, then we must save the frame pointer,
19567      IP (which will hold the old stack pointer), LR and the PC.  */
19568   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19569     save_reg_mask |=
19570       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19571       | (1 << IP_REGNUM)
19572       | (1 << LR_REGNUM)
19573       | (1 << PC_REGNUM);
19574
19575   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19576
19577   /* Decide if we need to save the link register.
19578      Interrupt routines have their own banked link register,
19579      so they never need to save it.
19580      Otherwise if we do not use the link register we do not need to save
19581      it.  If we are pushing other registers onto the stack however, we
19582      can save an instruction in the epilogue by pushing the link register
19583      now and then popping it back into the PC.  This incurs extra memory
19584      accesses though, so we only do it when optimizing for size, and only
19585      if we know that we will not need a fancy return sequence.  */
19586   if (df_regs_ever_live_p (LR_REGNUM)
19587       || (save_reg_mask
19588           && optimize_size
19589           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19590           && !crtl->tail_call_emit
19591           && !crtl->calls_eh_return))
19592     save_reg_mask |= 1 << LR_REGNUM;
19593
19594   if (cfun->machine->lr_save_eliminated)
19595     save_reg_mask &= ~ (1 << LR_REGNUM);
19596
19597   if (TARGET_REALLY_IWMMXT
19598       && ((bit_count (save_reg_mask)
19599            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19600                            arm_compute_static_chain_stack_bytes())
19601            ) % 2) != 0)
19602     {
19603       /* The total number of registers that are going to be pushed
19604          onto the stack is odd.  We need to ensure that the stack
19605          is 64-bit aligned before we start to save iWMMXt registers,
19606          and also before we start to create locals.  (A local variable
19607          might be a double or long long which we will load/store using
19608          an iWMMXt instruction).  Therefore we need to push another
19609          ARM register, so that the stack will be 64-bit aligned.  We
19610          try to avoid using the arg registers (r0 -r3) as they might be
19611          used to pass values in a tail call.  */
19612       for (reg = 4; reg <= 12; reg++)
19613         if ((save_reg_mask & (1 << reg)) == 0)
19614           break;
19615
19616       if (reg <= 12)
19617         save_reg_mask |= (1 << reg);
19618       else
19619         {
19620           cfun->machine->sibcall_blocked = 1;
19621           save_reg_mask |= (1 << 3);
19622         }
19623     }
19624
19625   /* We may need to push an additional register for use initializing the
19626      PIC base register.  */
19627   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19628       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19629     {
19630       reg = thumb_find_work_register (1 << 4);
19631       if (!call_used_regs[reg])
19632         save_reg_mask |= (1 << reg);
19633     }
19634
19635   return save_reg_mask;
19636 }
19637
19638 /* Compute a bit mask of which core registers need to be
19639    saved on the stack for the current function.  */
19640 static unsigned long
19641 thumb1_compute_save_core_reg_mask (void)
19642 {
19643   unsigned long mask;
19644   unsigned reg;
19645
19646   mask = 0;
19647   for (reg = 0; reg < 12; reg ++)
19648     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19649       mask |= 1 << reg;
19650
19651   /* Handle the frame pointer as a special case.  */
19652   if (frame_pointer_needed)
19653     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19654
19655   if (flag_pic
19656       && !TARGET_SINGLE_PIC_BASE
19657       && arm_pic_register != INVALID_REGNUM
19658       && crtl->uses_pic_offset_table)
19659     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19660
19661   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19662   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19663     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19664
19665   /* LR will also be pushed if any lo regs are pushed.  */
19666   if (mask & 0xff || thumb_force_lr_save ())
19667     mask |= (1 << LR_REGNUM);
19668
19669   bool call_clobbered_scratch
19670     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
19671        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
19672
19673   /* Make sure we have a low work register if we need one.  We will
19674      need one if we are going to push a high register, but we are not
19675      currently intending to push a low register.  However if both the
19676      prologue and epilogue have a spare call-clobbered low register,
19677      then we won't need to find an additional work register.  It does
19678      not need to be the same register in the prologue and
19679      epilogue.  */
19680   if ((mask & 0xff) == 0
19681       && !call_clobbered_scratch
19682       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19683     {
19684       /* Use thumb_find_work_register to choose which register
19685          we will use.  If the register is live then we will
19686          have to push it.  Use LAST_LO_REGNUM as our fallback
19687          choice for the register to select.  */
19688       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19689       /* Make sure the register returned by thumb_find_work_register is
19690          not part of the return value.  */
19691       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19692         reg = LAST_LO_REGNUM;
19693
19694       if (callee_saved_reg_p (reg))
19695         mask |= 1 << reg;
19696     }
19697
19698   /* The 504 below is 8 bytes less than 512 because there are two possible
19699      alignment words.  We can't tell here if they will be present or not so we
19700      have to play it safe and assume that they are. */
19701   if ((CALLER_INTERWORKING_SLOT_SIZE +
19702        ROUND_UP_WORD (get_frame_size ()) +
19703        crtl->outgoing_args_size) >= 504)
19704     {
19705       /* This is the same as the code in thumb1_expand_prologue() which
19706          determines which register to use for stack decrement. */
19707       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19708         if (mask & (1 << reg))
19709           break;
19710
19711       if (reg > LAST_LO_REGNUM)
19712         {
19713           /* Make sure we have a register available for stack decrement. */
19714           mask |= 1 << LAST_LO_REGNUM;
19715         }
19716     }
19717
19718   return mask;
19719 }
19720
19721
19722 /* Return the number of bytes required to save VFP registers.  */
19723 static int
19724 arm_get_vfp_saved_size (void)
19725 {
19726   unsigned int regno;
19727   int count;
19728   int saved;
19729
19730   saved = 0;
19731   /* Space for saved VFP registers.  */
19732   if (TARGET_HARD_FLOAT)
19733     {
19734       count = 0;
19735       for (regno = FIRST_VFP_REGNUM;
19736            regno < LAST_VFP_REGNUM;
19737            regno += 2)
19738         {
19739           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19740               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19741             {
19742               if (count > 0)
19743                 {
19744                   /* Workaround ARM10 VFPr1 bug.  */
19745                   if (count == 2 && !arm_arch6)
19746                     count++;
19747                   saved += count * 8;
19748                 }
19749               count = 0;
19750             }
19751           else
19752             count++;
19753         }
19754       if (count > 0)
19755         {
19756           if (count == 2 && !arm_arch6)
19757             count++;
19758           saved += count * 8;
19759         }
19760     }
19761   return saved;
19762 }
19763
19764
19765 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19766    everything bar the final return instruction.  If simple_return is true,
19767    then do not output epilogue, because it has already been emitted in RTL.
19768
19769    Note: do not forget to update length attribute of corresponding insn pattern
19770    when changing assembly output (eg. length attribute of
19771    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19772    register clearing sequences).  */
19773 const char *
19774 output_return_instruction (rtx operand, bool really_return, bool reverse,
19775                            bool simple_return)
19776 {
19777   char conditional[10];
19778   char instr[100];
19779   unsigned reg;
19780   unsigned long live_regs_mask;
19781   unsigned long func_type;
19782   arm_stack_offsets *offsets;
19783
19784   func_type = arm_current_func_type ();
19785
19786   if (IS_NAKED (func_type))
19787     return "";
19788
19789   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19790     {
19791       /* If this function was declared non-returning, and we have
19792          found a tail call, then we have to trust that the called
19793          function won't return.  */
19794       if (really_return)
19795         {
19796           rtx ops[2];
19797
19798           /* Otherwise, trap an attempted return by aborting.  */
19799           ops[0] = operand;
19800           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19801                                        : "abort");
19802           assemble_external_libcall (ops[1]);
19803           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19804         }
19805
19806       return "";
19807     }
19808
19809   gcc_assert (!cfun->calls_alloca || really_return);
19810
19811   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19812
19813   cfun->machine->return_used_this_function = 1;
19814
19815   offsets = arm_get_frame_offsets ();
19816   live_regs_mask = offsets->saved_regs_mask;
19817
19818   if (!simple_return && live_regs_mask)
19819     {
19820       const char * return_reg;
19821
19822       /* If we do not have any special requirements for function exit
19823          (e.g. interworking) then we can load the return address
19824          directly into the PC.  Otherwise we must load it into LR.  */
19825       if (really_return
19826           && !IS_CMSE_ENTRY (func_type)
19827           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19828         return_reg = reg_names[PC_REGNUM];
19829       else
19830         return_reg = reg_names[LR_REGNUM];
19831
19832       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19833         {
19834           /* There are three possible reasons for the IP register
19835              being saved.  1) a stack frame was created, in which case
19836              IP contains the old stack pointer, or 2) an ISR routine
19837              corrupted it, or 3) it was saved to align the stack on
19838              iWMMXt.  In case 1, restore IP into SP, otherwise just
19839              restore IP.  */
19840           if (frame_pointer_needed)
19841             {
19842               live_regs_mask &= ~ (1 << IP_REGNUM);
19843               live_regs_mask |=   (1 << SP_REGNUM);
19844             }
19845           else
19846             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19847         }
19848
19849       /* On some ARM architectures it is faster to use LDR rather than
19850          LDM to load a single register.  On other architectures, the
19851          cost is the same.  In 26 bit mode, or for exception handlers,
19852          we have to use LDM to load the PC so that the CPSR is also
19853          restored.  */
19854       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19855         if (live_regs_mask == (1U << reg))
19856           break;
19857
19858       if (reg <= LAST_ARM_REGNUM
19859           && (reg != LR_REGNUM
19860               || ! really_return
19861               || ! IS_INTERRUPT (func_type)))
19862         {
19863           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19864                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19865         }
19866       else
19867         {
19868           char *p;
19869           int first = 1;
19870
19871           /* Generate the load multiple instruction to restore the
19872              registers.  Note we can get here, even if
19873              frame_pointer_needed is true, but only if sp already
19874              points to the base of the saved core registers.  */
19875           if (live_regs_mask & (1 << SP_REGNUM))
19876             {
19877               unsigned HOST_WIDE_INT stack_adjust;
19878
19879               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19880               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19881
19882               if (stack_adjust && arm_arch5t && TARGET_ARM)
19883                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19884               else
19885                 {
19886                   /* If we can't use ldmib (SA110 bug),
19887                      then try to pop r3 instead.  */
19888                   if (stack_adjust)
19889                     live_regs_mask |= 1 << 3;
19890
19891                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19892                 }
19893             }
19894           /* For interrupt returns we have to use an LDM rather than
19895              a POP so that we can use the exception return variant.  */
19896           else if (IS_INTERRUPT (func_type))
19897             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19898           else
19899             sprintf (instr, "pop%s\t{", conditional);
19900
19901           p = instr + strlen (instr);
19902
19903           for (reg = 0; reg <= SP_REGNUM; reg++)
19904             if (live_regs_mask & (1 << reg))
19905               {
19906                 int l = strlen (reg_names[reg]);
19907
19908                 if (first)
19909                   first = 0;
19910                 else
19911                   {
19912                     memcpy (p, ", ", 2);
19913                     p += 2;
19914                   }
19915
19916                 memcpy (p, "%|", 2);
19917                 memcpy (p + 2, reg_names[reg], l);
19918                 p += l + 2;
19919               }
19920
19921           if (live_regs_mask & (1 << LR_REGNUM))
19922             {
19923               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19924               /* If returning from an interrupt, restore the CPSR.  */
19925               if (IS_INTERRUPT (func_type))
19926                 strcat (p, "^");
19927             }
19928           else
19929             strcpy (p, "}");
19930         }
19931
19932       output_asm_insn (instr, & operand);
19933
19934       /* See if we need to generate an extra instruction to
19935          perform the actual function return.  */
19936       if (really_return
19937           && func_type != ARM_FT_INTERWORKED
19938           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19939         {
19940           /* The return has already been handled
19941              by loading the LR into the PC.  */
19942           return "";
19943         }
19944     }
19945
19946   if (really_return)
19947     {
19948       switch ((int) ARM_FUNC_TYPE (func_type))
19949         {
19950         case ARM_FT_ISR:
19951         case ARM_FT_FIQ:
19952           /* ??? This is wrong for unified assembly syntax.  */
19953           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19954           break;
19955
19956         case ARM_FT_INTERWORKED:
19957           gcc_assert (arm_arch5t || arm_arch4t);
19958           sprintf (instr, "bx%s\t%%|lr", conditional);
19959           break;
19960
19961         case ARM_FT_EXCEPTION:
19962           /* ??? This is wrong for unified assembly syntax.  */
19963           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19964           break;
19965
19966         default:
19967           if (IS_CMSE_ENTRY (func_type))
19968             {
19969               /* Check if we have to clear the 'GE bits' which is only used if
19970                  parallel add and subtraction instructions are available.  */
19971               if (TARGET_INT_SIMD)
19972                 snprintf (instr, sizeof (instr),
19973                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19974               else
19975                 snprintf (instr, sizeof (instr),
19976                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19977
19978               output_asm_insn (instr, & operand);
19979               if (TARGET_HARD_FLOAT)
19980                 {
19981                   /* Clear the cumulative exception-status bits (0-4,7) and the
19982                      condition code bits (28-31) of the FPSCR.  We need to
19983                      remember to clear the first scratch register used (IP) and
19984                      save and restore the second (r4).  */
19985                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19986                   output_asm_insn (instr, & operand);
19987                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19988                   output_asm_insn (instr, & operand);
19989                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19990                   output_asm_insn (instr, & operand);
19991                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19992                   output_asm_insn (instr, & operand);
19993                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19994                   output_asm_insn (instr, & operand);
19995                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19996                   output_asm_insn (instr, & operand);
19997                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19998                   output_asm_insn (instr, & operand);
19999                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
20000                   output_asm_insn (instr, & operand);
20001                 }
20002               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
20003             }
20004           /* Use bx if it's available.  */
20005           else if (arm_arch5t || arm_arch4t)
20006             sprintf (instr, "bx%s\t%%|lr", conditional);
20007           else
20008             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
20009           break;
20010         }
20011
20012       output_asm_insn (instr, & operand);
20013     }
20014
20015   return "";
20016 }
20017
20018 /* Output in FILE asm statements needed to declare the NAME of the function
20019    defined by its DECL node.  */
20020
20021 void
20022 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
20023 {
20024   size_t cmse_name_len;
20025   char *cmse_name = 0;
20026   char cmse_prefix[] = "__acle_se_";
20027
20028   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20029      extra function label for each function with the 'cmse_nonsecure_entry'
20030      attribute.  This extra function label should be prepended with
20031      '__acle_se_', telling the linker that it needs to create secure gateway
20032      veneers for this function.  */
20033   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
20034                                     DECL_ATTRIBUTES (decl)))
20035     {
20036       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
20037       cmse_name = XALLOCAVEC (char, cmse_name_len);
20038       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
20039       targetm.asm_out.globalize_label (file, cmse_name);
20040
20041       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
20042       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
20043     }
20044
20045   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
20046   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20047   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20048   ASM_OUTPUT_LABEL (file, name);
20049
20050   if (cmse_name)
20051     ASM_OUTPUT_LABEL (file, cmse_name);
20052
20053   ARM_OUTPUT_FN_UNWIND (file, TRUE);
20054 }
20055
20056 /* Write the function name into the code section, directly preceding
20057    the function prologue.
20058
20059    Code will be output similar to this:
20060      t0
20061          .ascii "arm_poke_function_name", 0
20062          .align
20063      t1
20064          .word 0xff000000 + (t1 - t0)
20065      arm_poke_function_name
20066          mov     ip, sp
20067          stmfd   sp!, {fp, ip, lr, pc}
20068          sub     fp, ip, #4
20069
20070    When performing a stack backtrace, code can inspect the value
20071    of 'pc' stored at 'fp' + 0.  If the trace function then looks
20072    at location pc - 12 and the top 8 bits are set, then we know
20073    that there is a function name embedded immediately preceding this
20074    location and has length ((pc[-3]) & 0xff000000).
20075
20076    We assume that pc is declared as a pointer to an unsigned long.
20077
20078    It is of no benefit to output the function name if we are assembling
20079    a leaf function.  These function types will not contain a stack
20080    backtrace structure, therefore it is not possible to determine the
20081    function name.  */
20082 void
20083 arm_poke_function_name (FILE *stream, const char *name)
20084 {
20085   unsigned long alignlength;
20086   unsigned long length;
20087   rtx           x;
20088
20089   length      = strlen (name) + 1;
20090   alignlength = ROUND_UP_WORD (length);
20091
20092   ASM_OUTPUT_ASCII (stream, name, length);
20093   ASM_OUTPUT_ALIGN (stream, 2);
20094   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20095   assemble_aligned_integer (UNITS_PER_WORD, x);
20096 }
20097
20098 /* Place some comments into the assembler stream
20099    describing the current function.  */
20100 static void
20101 arm_output_function_prologue (FILE *f)
20102 {
20103   unsigned long func_type;
20104
20105   /* Sanity check.  */
20106   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20107
20108   func_type = arm_current_func_type ();
20109
20110   switch ((int) ARM_FUNC_TYPE (func_type))
20111     {
20112     default:
20113     case ARM_FT_NORMAL:
20114       break;
20115     case ARM_FT_INTERWORKED:
20116       asm_fprintf (f, "\t%@ Function supports interworking.\n");
20117       break;
20118     case ARM_FT_ISR:
20119       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20120       break;
20121     case ARM_FT_FIQ:
20122       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20123       break;
20124     case ARM_FT_EXCEPTION:
20125       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20126       break;
20127     }
20128
20129   if (IS_NAKED (func_type))
20130     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20131
20132   if (IS_VOLATILE (func_type))
20133     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20134
20135   if (IS_NESTED (func_type))
20136     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20137   if (IS_STACKALIGN (func_type))
20138     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20139   if (IS_CMSE_ENTRY (func_type))
20140     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20141
20142   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20143                (HOST_WIDE_INT) crtl->args.size,
20144                crtl->args.pretend_args_size,
20145                (HOST_WIDE_INT) get_frame_size ());
20146
20147   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20148                frame_pointer_needed,
20149                cfun->machine->uses_anonymous_args);
20150
20151   if (cfun->machine->lr_save_eliminated)
20152     asm_fprintf (f, "\t%@ link register save eliminated.\n");
20153
20154   if (crtl->calls_eh_return)
20155     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20156
20157 }
20158
20159 static void
20160 arm_output_function_epilogue (FILE *)
20161 {
20162   arm_stack_offsets *offsets;
20163
20164   if (TARGET_THUMB1)
20165     {
20166       int regno;
20167
20168       /* Emit any call-via-reg trampolines that are needed for v4t support
20169          of call_reg and call_value_reg type insns.  */
20170       for (regno = 0; regno < LR_REGNUM; regno++)
20171         {
20172           rtx label = cfun->machine->call_via[regno];
20173
20174           if (label != NULL)
20175             {
20176               switch_to_section (function_section (current_function_decl));
20177               targetm.asm_out.internal_label (asm_out_file, "L",
20178                                               CODE_LABEL_NUMBER (label));
20179               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20180             }
20181         }
20182
20183       /* ??? Probably not safe to set this here, since it assumes that a
20184          function will be emitted as assembly immediately after we generate
20185          RTL for it.  This does not happen for inline functions.  */
20186       cfun->machine->return_used_this_function = 0;
20187     }
20188   else /* TARGET_32BIT */
20189     {
20190       /* We need to take into account any stack-frame rounding.  */
20191       offsets = arm_get_frame_offsets ();
20192
20193       gcc_assert (!use_return_insn (FALSE, NULL)
20194                   || (cfun->machine->return_used_this_function != 0)
20195                   || offsets->saved_regs == offsets->outgoing_args
20196                   || frame_pointer_needed);
20197     }
20198 }
20199
20200 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20201    STR and STRD.  If an even number of registers are being pushed, one
20202    or more STRD patterns are created for each register pair.  If an
20203    odd number of registers are pushed, emit an initial STR followed by
20204    as many STRD instructions as are needed.  This works best when the
20205    stack is initially 64-bit aligned (the normal case), since it
20206    ensures that each STRD is also 64-bit aligned.  */
20207 static void
20208 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20209 {
20210   int num_regs = 0;
20211   int i;
20212   int regno;
20213   rtx par = NULL_RTX;
20214   rtx dwarf = NULL_RTX;
20215   rtx tmp;
20216   bool first = true;
20217
20218   num_regs = bit_count (saved_regs_mask);
20219
20220   /* Must be at least one register to save, and can't save SP or PC.  */
20221   gcc_assert (num_regs > 0 && num_regs <= 14);
20222   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20223   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20224
20225   /* Create sequence for DWARF info.  All the frame-related data for
20226      debugging is held in this wrapper.  */
20227   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20228
20229   /* Describe the stack adjustment.  */
20230   tmp = gen_rtx_SET (stack_pointer_rtx,
20231                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20232   RTX_FRAME_RELATED_P (tmp) = 1;
20233   XVECEXP (dwarf, 0, 0) = tmp;
20234
20235   /* Find the first register.  */
20236   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20237     ;
20238
20239   i = 0;
20240
20241   /* If there's an odd number of registers to push.  Start off by
20242      pushing a single register.  This ensures that subsequent strd
20243      operations are dword aligned (assuming that SP was originally
20244      64-bit aligned).  */
20245   if ((num_regs & 1) != 0)
20246     {
20247       rtx reg, mem, insn;
20248
20249       reg = gen_rtx_REG (SImode, regno);
20250       if (num_regs == 1)
20251         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20252                                                      stack_pointer_rtx));
20253       else
20254         mem = gen_frame_mem (Pmode,
20255                              gen_rtx_PRE_MODIFY
20256                              (Pmode, stack_pointer_rtx,
20257                               plus_constant (Pmode, stack_pointer_rtx,
20258                                              -4 * num_regs)));
20259
20260       tmp = gen_rtx_SET (mem, reg);
20261       RTX_FRAME_RELATED_P (tmp) = 1;
20262       insn = emit_insn (tmp);
20263       RTX_FRAME_RELATED_P (insn) = 1;
20264       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20265       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20266       RTX_FRAME_RELATED_P (tmp) = 1;
20267       i++;
20268       regno++;
20269       XVECEXP (dwarf, 0, i) = tmp;
20270       first = false;
20271     }
20272
20273   while (i < num_regs)
20274     if (saved_regs_mask & (1 << regno))
20275       {
20276         rtx reg1, reg2, mem1, mem2;
20277         rtx tmp0, tmp1, tmp2;
20278         int regno2;
20279
20280         /* Find the register to pair with this one.  */
20281         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20282              regno2++)
20283           ;
20284
20285         reg1 = gen_rtx_REG (SImode, regno);
20286         reg2 = gen_rtx_REG (SImode, regno2);
20287
20288         if (first)
20289           {
20290             rtx insn;
20291
20292             first = false;
20293             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20294                                                         stack_pointer_rtx,
20295                                                         -4 * num_regs));
20296             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20297                                                         stack_pointer_rtx,
20298                                                         -4 * (num_regs - 1)));
20299             tmp0 = gen_rtx_SET (stack_pointer_rtx,
20300                                 plus_constant (Pmode, stack_pointer_rtx,
20301                                                -4 * (num_regs)));
20302             tmp1 = gen_rtx_SET (mem1, reg1);
20303             tmp2 = gen_rtx_SET (mem2, reg2);
20304             RTX_FRAME_RELATED_P (tmp0) = 1;
20305             RTX_FRAME_RELATED_P (tmp1) = 1;
20306             RTX_FRAME_RELATED_P (tmp2) = 1;
20307             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20308             XVECEXP (par, 0, 0) = tmp0;
20309             XVECEXP (par, 0, 1) = tmp1;
20310             XVECEXP (par, 0, 2) = tmp2;
20311             insn = emit_insn (par);
20312             RTX_FRAME_RELATED_P (insn) = 1;
20313             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20314           }
20315         else
20316           {
20317             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20318                                                         stack_pointer_rtx,
20319                                                         4 * i));
20320             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20321                                                         stack_pointer_rtx,
20322                                                         4 * (i + 1)));
20323             tmp1 = gen_rtx_SET (mem1, reg1);
20324             tmp2 = gen_rtx_SET (mem2, reg2);
20325             RTX_FRAME_RELATED_P (tmp1) = 1;
20326             RTX_FRAME_RELATED_P (tmp2) = 1;
20327             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20328             XVECEXP (par, 0, 0) = tmp1;
20329             XVECEXP (par, 0, 1) = tmp2;
20330             emit_insn (par);
20331           }
20332
20333         /* Create unwind information.  This is an approximation.  */
20334         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20335                                            plus_constant (Pmode,
20336                                                           stack_pointer_rtx,
20337                                                           4 * i)),
20338                             reg1);
20339         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20340                                            plus_constant (Pmode,
20341                                                           stack_pointer_rtx,
20342                                                           4 * (i + 1))),
20343                             reg2);
20344
20345         RTX_FRAME_RELATED_P (tmp1) = 1;
20346         RTX_FRAME_RELATED_P (tmp2) = 1;
20347         XVECEXP (dwarf, 0, i + 1) = tmp1;
20348         XVECEXP (dwarf, 0, i + 2) = tmp2;
20349         i += 2;
20350         regno = regno2 + 1;
20351       }
20352     else
20353       regno++;
20354
20355   return;
20356 }
20357
20358 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20359    whenever possible, otherwise it emits single-word stores.  The first store
20360    also allocates stack space for all saved registers, using writeback with
20361    post-addressing mode.  All other stores use offset addressing.  If no STRD
20362    can be emitted, this function emits a sequence of single-word stores,
20363    and not an STM as before, because single-word stores provide more freedom
20364    scheduling and can be turned into an STM by peephole optimizations.  */
20365 static void
20366 arm_emit_strd_push (unsigned long saved_regs_mask)
20367 {
20368   int num_regs = 0;
20369   int i, j, dwarf_index  = 0;
20370   int offset = 0;
20371   rtx dwarf = NULL_RTX;
20372   rtx insn = NULL_RTX;
20373   rtx tmp, mem;
20374
20375   /* TODO: A more efficient code can be emitted by changing the
20376      layout, e.g., first push all pairs that can use STRD to keep the
20377      stack aligned, and then push all other registers.  */
20378   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20379     if (saved_regs_mask & (1 << i))
20380       num_regs++;
20381
20382   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20383   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20384   gcc_assert (num_regs > 0);
20385
20386   /* Create sequence for DWARF info.  */
20387   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20388
20389   /* For dwarf info, we generate explicit stack update.  */
20390   tmp = gen_rtx_SET (stack_pointer_rtx,
20391                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20392   RTX_FRAME_RELATED_P (tmp) = 1;
20393   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20394
20395   /* Save registers.  */
20396   offset = - 4 * num_regs;
20397   j = 0;
20398   while (j <= LAST_ARM_REGNUM)
20399     if (saved_regs_mask & (1 << j))
20400       {
20401         if ((j % 2 == 0)
20402             && (saved_regs_mask & (1 << (j + 1))))
20403           {
20404             /* Current register and previous register form register pair for
20405                which STRD can be generated.  */
20406             if (offset < 0)
20407               {
20408                 /* Allocate stack space for all saved registers.  */
20409                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20410                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20411                 mem = gen_frame_mem (DImode, tmp);
20412                 offset = 0;
20413               }
20414             else if (offset > 0)
20415               mem = gen_frame_mem (DImode,
20416                                    plus_constant (Pmode,
20417                                                   stack_pointer_rtx,
20418                                                   offset));
20419             else
20420               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20421
20422             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20423             RTX_FRAME_RELATED_P (tmp) = 1;
20424             tmp = emit_insn (tmp);
20425
20426             /* Record the first store insn.  */
20427             if (dwarf_index == 1)
20428               insn = tmp;
20429
20430             /* Generate dwarf info.  */
20431             mem = gen_frame_mem (SImode,
20432                                  plus_constant (Pmode,
20433                                                 stack_pointer_rtx,
20434                                                 offset));
20435             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20436             RTX_FRAME_RELATED_P (tmp) = 1;
20437             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20438
20439             mem = gen_frame_mem (SImode,
20440                                  plus_constant (Pmode,
20441                                                 stack_pointer_rtx,
20442                                                 offset + 4));
20443             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20444             RTX_FRAME_RELATED_P (tmp) = 1;
20445             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20446
20447             offset += 8;
20448             j += 2;
20449           }
20450         else
20451           {
20452             /* Emit a single word store.  */
20453             if (offset < 0)
20454               {
20455                 /* Allocate stack space for all saved registers.  */
20456                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20457                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20458                 mem = gen_frame_mem (SImode, tmp);
20459                 offset = 0;
20460               }
20461             else if (offset > 0)
20462               mem = gen_frame_mem (SImode,
20463                                    plus_constant (Pmode,
20464                                                   stack_pointer_rtx,
20465                                                   offset));
20466             else
20467               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20468
20469             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20470             RTX_FRAME_RELATED_P (tmp) = 1;
20471             tmp = emit_insn (tmp);
20472
20473             /* Record the first store insn.  */
20474             if (dwarf_index == 1)
20475               insn = tmp;
20476
20477             /* Generate dwarf info.  */
20478             mem = gen_frame_mem (SImode,
20479                                  plus_constant(Pmode,
20480                                                stack_pointer_rtx,
20481                                                offset));
20482             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20483             RTX_FRAME_RELATED_P (tmp) = 1;
20484             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20485
20486             offset += 4;
20487             j += 1;
20488           }
20489       }
20490     else
20491       j++;
20492
20493   /* Attach dwarf info to the first insn we generate.  */
20494   gcc_assert (insn != NULL_RTX);
20495   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20496   RTX_FRAME_RELATED_P (insn) = 1;
20497 }
20498
20499 /* Generate and emit an insn that we will recognize as a push_multi.
20500    Unfortunately, since this insn does not reflect very well the actual
20501    semantics of the operation, we need to annotate the insn for the benefit
20502    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20503    MASK for registers that should be annotated for DWARF2 frame unwind
20504    information.  */
20505 static rtx
20506 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20507 {
20508   int num_regs = 0;
20509   int num_dwarf_regs = 0;
20510   int i, j;
20511   rtx par;
20512   rtx dwarf;
20513   int dwarf_par_index;
20514   rtx tmp, reg;
20515
20516   /* We don't record the PC in the dwarf frame information.  */
20517   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20518
20519   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20520     {
20521       if (mask & (1 << i))
20522         num_regs++;
20523       if (dwarf_regs_mask & (1 << i))
20524         num_dwarf_regs++;
20525     }
20526
20527   gcc_assert (num_regs && num_regs <= 16);
20528   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20529
20530   /* For the body of the insn we are going to generate an UNSPEC in
20531      parallel with several USEs.  This allows the insn to be recognized
20532      by the push_multi pattern in the arm.md file.
20533
20534      The body of the insn looks something like this:
20535
20536        (parallel [
20537            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20538                                         (const_int:SI <num>)))
20539                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20540            (use (reg:SI XX))
20541            (use (reg:SI YY))
20542            ...
20543         ])
20544
20545      For the frame note however, we try to be more explicit and actually
20546      show each register being stored into the stack frame, plus a (single)
20547      decrement of the stack pointer.  We do it this way in order to be
20548      friendly to the stack unwinding code, which only wants to see a single
20549      stack decrement per instruction.  The RTL we generate for the note looks
20550      something like this:
20551
20552       (sequence [
20553            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20554            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20555            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20556            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20557            ...
20558         ])
20559
20560      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20561      instead we'd have a parallel expression detailing all
20562      the stores to the various memory addresses so that debug
20563      information is more up-to-date. Remember however while writing
20564      this to take care of the constraints with the push instruction.
20565
20566      Note also that this has to be taken care of for the VFP registers.
20567
20568      For more see PR43399.  */
20569
20570   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20571   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20572   dwarf_par_index = 1;
20573
20574   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20575     {
20576       if (mask & (1 << i))
20577         {
20578           reg = gen_rtx_REG (SImode, i);
20579
20580           XVECEXP (par, 0, 0)
20581             = gen_rtx_SET (gen_frame_mem
20582                            (BLKmode,
20583                             gen_rtx_PRE_MODIFY (Pmode,
20584                                                 stack_pointer_rtx,
20585                                                 plus_constant
20586                                                 (Pmode, stack_pointer_rtx,
20587                                                  -4 * num_regs))
20588                             ),
20589                            gen_rtx_UNSPEC (BLKmode,
20590                                            gen_rtvec (1, reg),
20591                                            UNSPEC_PUSH_MULT));
20592
20593           if (dwarf_regs_mask & (1 << i))
20594             {
20595               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20596                                  reg);
20597               RTX_FRAME_RELATED_P (tmp) = 1;
20598               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20599             }
20600
20601           break;
20602         }
20603     }
20604
20605   for (j = 1, i++; j < num_regs; i++)
20606     {
20607       if (mask & (1 << i))
20608         {
20609           reg = gen_rtx_REG (SImode, i);
20610
20611           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20612
20613           if (dwarf_regs_mask & (1 << i))
20614             {
20615               tmp
20616                 = gen_rtx_SET (gen_frame_mem
20617                                (SImode,
20618                                 plus_constant (Pmode, stack_pointer_rtx,
20619                                                4 * j)),
20620                                reg);
20621               RTX_FRAME_RELATED_P (tmp) = 1;
20622               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20623             }
20624
20625           j++;
20626         }
20627     }
20628
20629   par = emit_insn (par);
20630
20631   tmp = gen_rtx_SET (stack_pointer_rtx,
20632                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20633   RTX_FRAME_RELATED_P (tmp) = 1;
20634   XVECEXP (dwarf, 0, 0) = tmp;
20635
20636   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20637
20638   return par;
20639 }
20640
20641 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20642    SIZE is the offset to be adjusted.
20643    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20644 static void
20645 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20646 {
20647   rtx dwarf;
20648
20649   RTX_FRAME_RELATED_P (insn) = 1;
20650   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20651   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20652 }
20653
20654 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20655    SAVED_REGS_MASK shows which registers need to be restored.
20656
20657    Unfortunately, since this insn does not reflect very well the actual
20658    semantics of the operation, we need to annotate the insn for the benefit
20659    of DWARF2 frame unwind information.  */
20660 static void
20661 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20662 {
20663   int num_regs = 0;
20664   int i, j;
20665   rtx par;
20666   rtx dwarf = NULL_RTX;
20667   rtx tmp, reg;
20668   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20669   int offset_adj;
20670   int emit_update;
20671
20672   offset_adj = return_in_pc ? 1 : 0;
20673   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20674     if (saved_regs_mask & (1 << i))
20675       num_regs++;
20676
20677   gcc_assert (num_regs && num_regs <= 16);
20678
20679   /* If SP is in reglist, then we don't emit SP update insn.  */
20680   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20681
20682   /* The parallel needs to hold num_regs SETs
20683      and one SET for the stack update.  */
20684   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20685
20686   if (return_in_pc)
20687     XVECEXP (par, 0, 0) = ret_rtx;
20688
20689   if (emit_update)
20690     {
20691       /* Increment the stack pointer, based on there being
20692          num_regs 4-byte registers to restore.  */
20693       tmp = gen_rtx_SET (stack_pointer_rtx,
20694                          plus_constant (Pmode,
20695                                         stack_pointer_rtx,
20696                                         4 * num_regs));
20697       RTX_FRAME_RELATED_P (tmp) = 1;
20698       XVECEXP (par, 0, offset_adj) = tmp;
20699     }
20700
20701   /* Now restore every reg, which may include PC.  */
20702   for (j = 0, i = 0; j < num_regs; i++)
20703     if (saved_regs_mask & (1 << i))
20704       {
20705         reg = gen_rtx_REG (SImode, i);
20706         if ((num_regs == 1) && emit_update && !return_in_pc)
20707           {
20708             /* Emit single load with writeback.  */
20709             tmp = gen_frame_mem (SImode,
20710                                  gen_rtx_POST_INC (Pmode,
20711                                                    stack_pointer_rtx));
20712             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20713             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20714             return;
20715           }
20716
20717         tmp = gen_rtx_SET (reg,
20718                            gen_frame_mem
20719                            (SImode,
20720                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20721         RTX_FRAME_RELATED_P (tmp) = 1;
20722         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20723
20724         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20725            should not have PC, skip PC.  */
20726         if (i != PC_REGNUM)
20727           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20728
20729         j++;
20730       }
20731
20732   if (return_in_pc)
20733     par = emit_jump_insn (par);
20734   else
20735     par = emit_insn (par);
20736
20737   REG_NOTES (par) = dwarf;
20738   if (!return_in_pc)
20739     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20740                                  stack_pointer_rtx, stack_pointer_rtx);
20741 }
20742
20743 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20744    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20745
20746    Unfortunately, since this insn does not reflect very well the actual
20747    semantics of the operation, we need to annotate the insn for the benefit
20748    of DWARF2 frame unwind information.  */
20749 static void
20750 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20751 {
20752   int i, j;
20753   rtx par;
20754   rtx dwarf = NULL_RTX;
20755   rtx tmp, reg;
20756
20757   gcc_assert (num_regs && num_regs <= 32);
20758
20759     /* Workaround ARM10 VFPr1 bug.  */
20760   if (num_regs == 2 && !arm_arch6)
20761     {
20762       if (first_reg == 15)
20763         first_reg--;
20764
20765       num_regs++;
20766     }
20767
20768   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20769      there could be up to 32 D-registers to restore.
20770      If there are more than 16 D-registers, make two recursive calls,
20771      each of which emits one pop_multi instruction.  */
20772   if (num_regs > 16)
20773     {
20774       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20775       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20776       return;
20777     }
20778
20779   /* The parallel needs to hold num_regs SETs
20780      and one SET for the stack update.  */
20781   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20782
20783   /* Increment the stack pointer, based on there being
20784      num_regs 8-byte registers to restore.  */
20785   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20786   RTX_FRAME_RELATED_P (tmp) = 1;
20787   XVECEXP (par, 0, 0) = tmp;
20788
20789   /* Now show every reg that will be restored, using a SET for each.  */
20790   for (j = 0, i=first_reg; j < num_regs; i += 2)
20791     {
20792       reg = gen_rtx_REG (DFmode, i);
20793
20794       tmp = gen_rtx_SET (reg,
20795                          gen_frame_mem
20796                          (DFmode,
20797                           plus_constant (Pmode, base_reg, 8 * j)));
20798       RTX_FRAME_RELATED_P (tmp) = 1;
20799       XVECEXP (par, 0, j + 1) = tmp;
20800
20801       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20802
20803       j++;
20804     }
20805
20806   par = emit_insn (par);
20807   REG_NOTES (par) = dwarf;
20808
20809   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20810   if (REGNO (base_reg) == IP_REGNUM)
20811     {
20812       RTX_FRAME_RELATED_P (par) = 1;
20813       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20814     }
20815   else
20816     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20817                                  base_reg, base_reg);
20818 }
20819
20820 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20821    number of registers are being popped, multiple LDRD patterns are created for
20822    all register pairs.  If odd number of registers are popped, last register is
20823    loaded by using LDR pattern.  */
20824 static void
20825 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20826 {
20827   int num_regs = 0;
20828   int i, j;
20829   rtx par = NULL_RTX;
20830   rtx dwarf = NULL_RTX;
20831   rtx tmp, reg, tmp1;
20832   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20833
20834   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20835     if (saved_regs_mask & (1 << i))
20836       num_regs++;
20837
20838   gcc_assert (num_regs && num_regs <= 16);
20839
20840   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20841      to be popped.  So, if num_regs is even, now it will become odd,
20842      and we can generate pop with PC.  If num_regs is odd, it will be
20843      even now, and ldr with return can be generated for PC.  */
20844   if (return_in_pc)
20845     num_regs--;
20846
20847   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20848
20849   /* Var j iterates over all the registers to gather all the registers in
20850      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20851      A PARALLEL RTX of register-pair is created here, so that pattern for
20852      LDRD can be matched.  As PC is always last register to be popped, and
20853      we have already decremented num_regs if PC, we don't have to worry
20854      about PC in this loop.  */
20855   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20856     if (saved_regs_mask & (1 << j))
20857       {
20858         /* Create RTX for memory load.  */
20859         reg = gen_rtx_REG (SImode, j);
20860         tmp = gen_rtx_SET (reg,
20861                            gen_frame_mem (SImode,
20862                                plus_constant (Pmode,
20863                                               stack_pointer_rtx, 4 * i)));
20864         RTX_FRAME_RELATED_P (tmp) = 1;
20865
20866         if (i % 2 == 0)
20867           {
20868             /* When saved-register index (i) is even, the RTX to be emitted is
20869                yet to be created.  Hence create it first.  The LDRD pattern we
20870                are generating is :
20871                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20872                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20873                where target registers need not be consecutive.  */
20874             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20875             dwarf = NULL_RTX;
20876           }
20877
20878         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20879            added as 0th element and if i is odd, reg_i is added as 1st element
20880            of LDRD pattern shown above.  */
20881         XVECEXP (par, 0, (i % 2)) = tmp;
20882         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20883
20884         if ((i % 2) == 1)
20885           {
20886             /* When saved-register index (i) is odd, RTXs for both the registers
20887                to be loaded are generated in above given LDRD pattern, and the
20888                pattern can be emitted now.  */
20889             par = emit_insn (par);
20890             REG_NOTES (par) = dwarf;
20891             RTX_FRAME_RELATED_P (par) = 1;
20892           }
20893
20894         i++;
20895       }
20896
20897   /* If the number of registers pushed is odd AND return_in_pc is false OR
20898      number of registers are even AND return_in_pc is true, last register is
20899      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20900      then LDR with post increment.  */
20901
20902   /* Increment the stack pointer, based on there being
20903      num_regs 4-byte registers to restore.  */
20904   tmp = gen_rtx_SET (stack_pointer_rtx,
20905                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20906   RTX_FRAME_RELATED_P (tmp) = 1;
20907   tmp = emit_insn (tmp);
20908   if (!return_in_pc)
20909     {
20910       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20911                                    stack_pointer_rtx, stack_pointer_rtx);
20912     }
20913
20914   dwarf = NULL_RTX;
20915
20916   if (((num_regs % 2) == 1 && !return_in_pc)
20917       || ((num_regs % 2) == 0 && return_in_pc))
20918     {
20919       /* Scan for the single register to be popped.  Skip until the saved
20920          register is found.  */
20921       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20922
20923       /* Gen LDR with post increment here.  */
20924       tmp1 = gen_rtx_MEM (SImode,
20925                           gen_rtx_POST_INC (SImode,
20926                                             stack_pointer_rtx));
20927       set_mem_alias_set (tmp1, get_frame_alias_set ());
20928
20929       reg = gen_rtx_REG (SImode, j);
20930       tmp = gen_rtx_SET (reg, tmp1);
20931       RTX_FRAME_RELATED_P (tmp) = 1;
20932       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20933
20934       if (return_in_pc)
20935         {
20936           /* If return_in_pc, j must be PC_REGNUM.  */
20937           gcc_assert (j == PC_REGNUM);
20938           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20939           XVECEXP (par, 0, 0) = ret_rtx;
20940           XVECEXP (par, 0, 1) = tmp;
20941           par = emit_jump_insn (par);
20942         }
20943       else
20944         {
20945           par = emit_insn (tmp);
20946           REG_NOTES (par) = dwarf;
20947           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20948                                        stack_pointer_rtx, stack_pointer_rtx);
20949         }
20950
20951     }
20952   else if ((num_regs % 2) == 1 && return_in_pc)
20953     {
20954       /* There are 2 registers to be popped.  So, generate the pattern
20955          pop_multiple_with_stack_update_and_return to pop in PC.  */
20956       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20957     }
20958
20959   return;
20960 }
20961
20962 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20963    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20964    offset addressing and then generates one separate stack udpate. This provides
20965    more scheduling freedom, compared to writeback on every load.  However,
20966    if the function returns using load into PC directly
20967    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20968    before the last load.  TODO: Add a peephole optimization to recognize
20969    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20970    peephole optimization to merge the load at stack-offset zero
20971    with the stack update instruction using load with writeback
20972    in post-index addressing mode.  */
20973 static void
20974 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20975 {
20976   int j = 0;
20977   int offset = 0;
20978   rtx par = NULL_RTX;
20979   rtx dwarf = NULL_RTX;
20980   rtx tmp, mem;
20981
20982   /* Restore saved registers.  */
20983   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20984   j = 0;
20985   while (j <= LAST_ARM_REGNUM)
20986     if (saved_regs_mask & (1 << j))
20987       {
20988         if ((j % 2) == 0
20989             && (saved_regs_mask & (1 << (j + 1)))
20990             && (j + 1) != PC_REGNUM)
20991           {
20992             /* Current register and next register form register pair for which
20993                LDRD can be generated. PC is always the last register popped, and
20994                we handle it separately.  */
20995             if (offset > 0)
20996               mem = gen_frame_mem (DImode,
20997                                    plus_constant (Pmode,
20998                                                   stack_pointer_rtx,
20999                                                   offset));
21000             else
21001               mem = gen_frame_mem (DImode, stack_pointer_rtx);
21002
21003             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
21004             tmp = emit_insn (tmp);
21005             RTX_FRAME_RELATED_P (tmp) = 1;
21006
21007             /* Generate dwarf info.  */
21008
21009             dwarf = alloc_reg_note (REG_CFA_RESTORE,
21010                                     gen_rtx_REG (SImode, j),
21011                                     NULL_RTX);
21012             dwarf = alloc_reg_note (REG_CFA_RESTORE,
21013                                     gen_rtx_REG (SImode, j + 1),
21014                                     dwarf);
21015
21016             REG_NOTES (tmp) = dwarf;
21017
21018             offset += 8;
21019             j += 2;
21020           }
21021         else if (j != PC_REGNUM)
21022           {
21023             /* Emit a single word load.  */
21024             if (offset > 0)
21025               mem = gen_frame_mem (SImode,
21026                                    plus_constant (Pmode,
21027                                                   stack_pointer_rtx,
21028                                                   offset));
21029             else
21030               mem = gen_frame_mem (SImode, stack_pointer_rtx);
21031
21032             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
21033             tmp = emit_insn (tmp);
21034             RTX_FRAME_RELATED_P (tmp) = 1;
21035
21036             /* Generate dwarf info.  */
21037             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
21038                                               gen_rtx_REG (SImode, j),
21039                                               NULL_RTX);
21040
21041             offset += 4;
21042             j += 1;
21043           }
21044         else /* j == PC_REGNUM */
21045           j++;
21046       }
21047     else
21048       j++;
21049
21050   /* Update the stack.  */
21051   if (offset > 0)
21052     {
21053       tmp = gen_rtx_SET (stack_pointer_rtx,
21054                          plus_constant (Pmode,
21055                                         stack_pointer_rtx,
21056                                         offset));
21057       tmp = emit_insn (tmp);
21058       arm_add_cfa_adjust_cfa_note (tmp, offset,
21059                                    stack_pointer_rtx, stack_pointer_rtx);
21060       offset = 0;
21061     }
21062
21063   if (saved_regs_mask & (1 << PC_REGNUM))
21064     {
21065       /* Only PC is to be popped.  */
21066       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21067       XVECEXP (par, 0, 0) = ret_rtx;
21068       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
21069                          gen_frame_mem (SImode,
21070                                         gen_rtx_POST_INC (SImode,
21071                                                           stack_pointer_rtx)));
21072       RTX_FRAME_RELATED_P (tmp) = 1;
21073       XVECEXP (par, 0, 1) = tmp;
21074       par = emit_jump_insn (par);
21075
21076       /* Generate dwarf info.  */
21077       dwarf = alloc_reg_note (REG_CFA_RESTORE,
21078                               gen_rtx_REG (SImode, PC_REGNUM),
21079                               NULL_RTX);
21080       REG_NOTES (par) = dwarf;
21081       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21082                                    stack_pointer_rtx, stack_pointer_rtx);
21083     }
21084 }
21085
21086 /* Calculate the size of the return value that is passed in registers.  */
21087 static unsigned
21088 arm_size_return_regs (void)
21089 {
21090   machine_mode mode;
21091
21092   if (crtl->return_rtx != 0)
21093     mode = GET_MODE (crtl->return_rtx);
21094   else
21095     mode = DECL_MODE (DECL_RESULT (current_function_decl));
21096
21097   return GET_MODE_SIZE (mode);
21098 }
21099
21100 /* Return true if the current function needs to save/restore LR.  */
21101 static bool
21102 thumb_force_lr_save (void)
21103 {
21104   return !cfun->machine->lr_save_eliminated
21105          && (!crtl->is_leaf
21106              || thumb_far_jump_used_p ()
21107              || df_regs_ever_live_p (LR_REGNUM));
21108 }
21109
21110 /* We do not know if r3 will be available because
21111    we do have an indirect tailcall happening in this
21112    particular case.  */
21113 static bool
21114 is_indirect_tailcall_p (rtx call)
21115 {
21116   rtx pat = PATTERN (call);
21117
21118   /* Indirect tail call.  */
21119   pat = XVECEXP (pat, 0, 0);
21120   if (GET_CODE (pat) == SET)
21121     pat = SET_SRC (pat);
21122
21123   pat = XEXP (XEXP (pat, 0), 0);
21124   return REG_P (pat);
21125 }
21126
21127 /* Return true if r3 is used by any of the tail call insns in the
21128    current function.  */
21129 static bool
21130 any_sibcall_could_use_r3 (void)
21131 {
21132   edge_iterator ei;
21133   edge e;
21134
21135   if (!crtl->tail_call_emit)
21136     return false;
21137   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21138     if (e->flags & EDGE_SIBCALL)
21139       {
21140         rtx_insn *call = BB_END (e->src);
21141         if (!CALL_P (call))
21142           call = prev_nonnote_nondebug_insn (call);
21143         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21144         if (find_regno_fusage (call, USE, 3)
21145             || is_indirect_tailcall_p (call))
21146           return true;
21147       }
21148   return false;
21149 }
21150
21151
21152 /* Compute the distance from register FROM to register TO.
21153    These can be the arg pointer (26), the soft frame pointer (25),
21154    the stack pointer (13) or the hard frame pointer (11).
21155    In thumb mode r7 is used as the soft frame pointer, if needed.
21156    Typical stack layout looks like this:
21157
21158        old stack pointer -> |    |
21159                              ----
21160                             |    | \
21161                             |    |   saved arguments for
21162                             |    |   vararg functions
21163                             |    | /
21164                               --
21165    hard FP & arg pointer -> |    | \
21166                             |    |   stack
21167                             |    |   frame
21168                             |    | /
21169                               --
21170                             |    | \
21171                             |    |   call saved
21172                             |    |   registers
21173       soft frame pointer -> |    | /
21174                               --
21175                             |    | \
21176                             |    |   local
21177                             |    |   variables
21178      locals base pointer -> |    | /
21179                               --
21180                             |    | \
21181                             |    |   outgoing
21182                             |    |   arguments
21183    current stack pointer -> |    | /
21184                               --
21185
21186   For a given function some or all of these stack components
21187   may not be needed, giving rise to the possibility of
21188   eliminating some of the registers.
21189
21190   The values returned by this function must reflect the behavior
21191   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21192
21193   The sign of the number returned reflects the direction of stack
21194   growth, so the values are positive for all eliminations except
21195   from the soft frame pointer to the hard frame pointer.
21196
21197   SFP may point just inside the local variables block to ensure correct
21198   alignment.  */
21199
21200
21201 /* Return cached stack offsets.  */
21202
21203 static arm_stack_offsets *
21204 arm_get_frame_offsets (void)
21205 {
21206   struct arm_stack_offsets *offsets;
21207
21208   offsets = &cfun->machine->stack_offsets;
21209
21210   return offsets;
21211 }
21212
21213
21214 /* Calculate stack offsets.  These are used to calculate register elimination
21215    offsets and in prologue/epilogue code.  Also calculates which registers
21216    should be saved.  */
21217
21218 static void
21219 arm_compute_frame_layout (void)
21220 {
21221   struct arm_stack_offsets *offsets;
21222   unsigned long func_type;
21223   int saved;
21224   int core_saved;
21225   HOST_WIDE_INT frame_size;
21226   int i;
21227
21228   offsets = &cfun->machine->stack_offsets;
21229
21230   /* Initially this is the size of the local variables.  It will translated
21231      into an offset once we have determined the size of preceding data.  */
21232   frame_size = ROUND_UP_WORD (get_frame_size ());
21233
21234   /* Space for variadic functions.  */
21235   offsets->saved_args = crtl->args.pretend_args_size;
21236
21237   /* In Thumb mode this is incorrect, but never used.  */
21238   offsets->frame
21239     = (offsets->saved_args
21240        + arm_compute_static_chain_stack_bytes ()
21241        + (frame_pointer_needed ? 4 : 0));
21242
21243   if (TARGET_32BIT)
21244     {
21245       unsigned int regno;
21246
21247       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21248       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21249       saved = core_saved;
21250
21251       /* We know that SP will be doubleword aligned on entry, and we must
21252          preserve that condition at any subroutine call.  We also require the
21253          soft frame pointer to be doubleword aligned.  */
21254
21255       if (TARGET_REALLY_IWMMXT)
21256         {
21257           /* Check for the call-saved iWMMXt registers.  */
21258           for (regno = FIRST_IWMMXT_REGNUM;
21259                regno <= LAST_IWMMXT_REGNUM;
21260                regno++)
21261             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21262               saved += 8;
21263         }
21264
21265       func_type = arm_current_func_type ();
21266       /* Space for saved VFP registers.  */
21267       if (! IS_VOLATILE (func_type)
21268           && TARGET_HARD_FLOAT)
21269         saved += arm_get_vfp_saved_size ();
21270     }
21271   else /* TARGET_THUMB1 */
21272     {
21273       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21274       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21275       saved = core_saved;
21276       if (TARGET_BACKTRACE)
21277         saved += 16;
21278     }
21279
21280   /* Saved registers include the stack frame.  */
21281   offsets->saved_regs
21282     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21283   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21284
21285   /* A leaf function does not need any stack alignment if it has nothing
21286      on the stack.  */
21287   if (crtl->is_leaf && frame_size == 0
21288       /* However if it calls alloca(), we have a dynamically allocated
21289          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
21290       && ! cfun->calls_alloca)
21291     {
21292       offsets->outgoing_args = offsets->soft_frame;
21293       offsets->locals_base = offsets->soft_frame;
21294       return;
21295     }
21296
21297   /* Ensure SFP has the correct alignment.  */
21298   if (ARM_DOUBLEWORD_ALIGN
21299       && (offsets->soft_frame & 7))
21300     {
21301       offsets->soft_frame += 4;
21302       /* Try to align stack by pushing an extra reg.  Don't bother doing this
21303          when there is a stack frame as the alignment will be rolled into
21304          the normal stack adjustment.  */
21305       if (frame_size + crtl->outgoing_args_size == 0)
21306         {
21307           int reg = -1;
21308
21309           /* Register r3 is caller-saved.  Normally it does not need to be
21310              saved on entry by the prologue.  However if we choose to save
21311              it for padding then we may confuse the compiler into thinking
21312              a prologue sequence is required when in fact it is not.  This
21313              will occur when shrink-wrapping if r3 is used as a scratch
21314              register and there are no other callee-saved writes.
21315
21316              This situation can be avoided when other callee-saved registers
21317              are available and r3 is not mandatory if we choose a callee-saved
21318              register for padding.  */
21319           bool prefer_callee_reg_p = false;
21320
21321           /* If it is safe to use r3, then do so.  This sometimes
21322              generates better code on Thumb-2 by avoiding the need to
21323              use 32-bit push/pop instructions.  */
21324           if (! any_sibcall_could_use_r3 ()
21325               && arm_size_return_regs () <= 12
21326               && (offsets->saved_regs_mask & (1 << 3)) == 0
21327               && (TARGET_THUMB2
21328                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21329             {
21330               reg = 3;
21331               if (!TARGET_THUMB2)
21332                 prefer_callee_reg_p = true;
21333             }
21334           if (reg == -1
21335               || prefer_callee_reg_p)
21336             {
21337               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21338                 {
21339                   /* Avoid fixed registers; they may be changed at
21340                      arbitrary times so it's unsafe to restore them
21341                      during the epilogue.  */
21342                   if (!fixed_regs[i]
21343                       && (offsets->saved_regs_mask & (1 << i)) == 0)
21344                     {
21345                       reg = i;
21346                       break;
21347                     }
21348                 }
21349             }
21350
21351           if (reg != -1)
21352             {
21353               offsets->saved_regs += 4;
21354               offsets->saved_regs_mask |= (1 << reg);
21355             }
21356         }
21357     }
21358
21359   offsets->locals_base = offsets->soft_frame + frame_size;
21360   offsets->outgoing_args = (offsets->locals_base
21361                             + crtl->outgoing_args_size);
21362
21363   if (ARM_DOUBLEWORD_ALIGN)
21364     {
21365       /* Ensure SP remains doubleword aligned.  */
21366       if (offsets->outgoing_args & 7)
21367         offsets->outgoing_args += 4;
21368       gcc_assert (!(offsets->outgoing_args & 7));
21369     }
21370 }
21371
21372
21373 /* Calculate the relative offsets for the different stack pointers.  Positive
21374    offsets are in the direction of stack growth.  */
21375
21376 HOST_WIDE_INT
21377 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21378 {
21379   arm_stack_offsets *offsets;
21380
21381   offsets = arm_get_frame_offsets ();
21382
21383   /* OK, now we have enough information to compute the distances.
21384      There must be an entry in these switch tables for each pair
21385      of registers in ELIMINABLE_REGS, even if some of the entries
21386      seem to be redundant or useless.  */
21387   switch (from)
21388     {
21389     case ARG_POINTER_REGNUM:
21390       switch (to)
21391         {
21392         case THUMB_HARD_FRAME_POINTER_REGNUM:
21393           return 0;
21394
21395         case FRAME_POINTER_REGNUM:
21396           /* This is the reverse of the soft frame pointer
21397              to hard frame pointer elimination below.  */
21398           return offsets->soft_frame - offsets->saved_args;
21399
21400         case ARM_HARD_FRAME_POINTER_REGNUM:
21401           /* This is only non-zero in the case where the static chain register
21402              is stored above the frame.  */
21403           return offsets->frame - offsets->saved_args - 4;
21404
21405         case STACK_POINTER_REGNUM:
21406           /* If nothing has been pushed on the stack at all
21407              then this will return -4.  This *is* correct!  */
21408           return offsets->outgoing_args - (offsets->saved_args + 4);
21409
21410         default:
21411           gcc_unreachable ();
21412         }
21413       gcc_unreachable ();
21414
21415     case FRAME_POINTER_REGNUM:
21416       switch (to)
21417         {
21418         case THUMB_HARD_FRAME_POINTER_REGNUM:
21419           return 0;
21420
21421         case ARM_HARD_FRAME_POINTER_REGNUM:
21422           /* The hard frame pointer points to the top entry in the
21423              stack frame.  The soft frame pointer to the bottom entry
21424              in the stack frame.  If there is no stack frame at all,
21425              then they are identical.  */
21426
21427           return offsets->frame - offsets->soft_frame;
21428
21429         case STACK_POINTER_REGNUM:
21430           return offsets->outgoing_args - offsets->soft_frame;
21431
21432         default:
21433           gcc_unreachable ();
21434         }
21435       gcc_unreachable ();
21436
21437     default:
21438       /* You cannot eliminate from the stack pointer.
21439          In theory you could eliminate from the hard frame
21440          pointer to the stack pointer, but this will never
21441          happen, since if a stack frame is not needed the
21442          hard frame pointer will never be used.  */
21443       gcc_unreachable ();
21444     }
21445 }
21446
21447 /* Given FROM and TO register numbers, say whether this elimination is
21448    allowed.  Frame pointer elimination is automatically handled.
21449
21450    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21451    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21452    pointer, we must eliminate FRAME_POINTER_REGNUM into
21453    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21454    ARG_POINTER_REGNUM.  */
21455
21456 bool
21457 arm_can_eliminate (const int from, const int to)
21458 {
21459   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21460           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21461           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21462           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21463            true);
21464 }
21465
21466 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21467    number of bytes pushed.  */
21468
21469 static int
21470 arm_save_coproc_regs(void)
21471 {
21472   int saved_size = 0;
21473   unsigned reg;
21474   unsigned start_reg;
21475   rtx insn;
21476
21477   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21478     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21479       {
21480         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21481         insn = gen_rtx_MEM (V2SImode, insn);
21482         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21483         RTX_FRAME_RELATED_P (insn) = 1;
21484         saved_size += 8;
21485       }
21486
21487   if (TARGET_HARD_FLOAT)
21488     {
21489       start_reg = FIRST_VFP_REGNUM;
21490
21491       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21492         {
21493           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21494               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21495             {
21496               if (start_reg != reg)
21497                 saved_size += vfp_emit_fstmd (start_reg,
21498                                               (reg - start_reg) / 2);
21499               start_reg = reg + 2;
21500             }
21501         }
21502       if (start_reg != reg)
21503         saved_size += vfp_emit_fstmd (start_reg,
21504                                       (reg - start_reg) / 2);
21505     }
21506   return saved_size;
21507 }
21508
21509
21510 /* Set the Thumb frame pointer from the stack pointer.  */
21511
21512 static void
21513 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21514 {
21515   HOST_WIDE_INT amount;
21516   rtx insn, dwarf;
21517
21518   amount = offsets->outgoing_args - offsets->locals_base;
21519   if (amount < 1024)
21520     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21521                                   stack_pointer_rtx, GEN_INT (amount)));
21522   else
21523     {
21524       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21525       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21526          expects the first two operands to be the same.  */
21527       if (TARGET_THUMB2)
21528         {
21529           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21530                                         stack_pointer_rtx,
21531                                         hard_frame_pointer_rtx));
21532         }
21533       else
21534         {
21535           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21536                                         hard_frame_pointer_rtx,
21537                                         stack_pointer_rtx));
21538         }
21539       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21540                            plus_constant (Pmode, stack_pointer_rtx, amount));
21541       RTX_FRAME_RELATED_P (dwarf) = 1;
21542       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21543     }
21544
21545   RTX_FRAME_RELATED_P (insn) = 1;
21546 }
21547
21548 struct scratch_reg {
21549   rtx reg;
21550   bool saved;
21551 };
21552
21553 /* Return a short-lived scratch register for use as a 2nd scratch register on
21554    function entry after the registers are saved in the prologue.  This register
21555    must be released by means of release_scratch_register_on_entry.  IP is not
21556    considered since it is always used as the 1st scratch register if available.
21557
21558    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21559    mask of live registers.  */
21560
21561 static void
21562 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21563                                unsigned long live_regs)
21564 {
21565   int regno = -1;
21566
21567   sr->saved = false;
21568
21569   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21570     regno = LR_REGNUM;
21571   else
21572     {
21573       unsigned int i;
21574
21575       for (i = 4; i < 11; i++)
21576         if (regno1 != i && (live_regs & (1 << i)) != 0)
21577           {
21578             regno = i;
21579             break;
21580           }
21581
21582       if (regno < 0)
21583         {
21584           /* If IP is used as the 1st scratch register for a nested function,
21585              then either r3 wasn't available or is used to preserve IP.  */
21586           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21587             regno1 = 3;
21588           regno = (regno1 == 3 ? 2 : 3);
21589           sr->saved
21590             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21591                                regno);
21592         }
21593     }
21594
21595   sr->reg = gen_rtx_REG (SImode, regno);
21596   if (sr->saved)
21597     {
21598       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21599       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21600       rtx x = gen_rtx_SET (stack_pointer_rtx,
21601                            plus_constant (Pmode, stack_pointer_rtx, -4));
21602       RTX_FRAME_RELATED_P (insn) = 1;
21603       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21604     }
21605 }
21606
21607 /* Release a scratch register obtained from the preceding function.  */
21608
21609 static void
21610 release_scratch_register_on_entry (struct scratch_reg *sr)
21611 {
21612   if (sr->saved)
21613     {
21614       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21615       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21616       rtx x = gen_rtx_SET (stack_pointer_rtx,
21617                            plus_constant (Pmode, stack_pointer_rtx, 4));
21618       RTX_FRAME_RELATED_P (insn) = 1;
21619       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21620     }
21621 }
21622
21623 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21624
21625 #if PROBE_INTERVAL > 4096
21626 #error Cannot use indexed addressing mode for stack probing
21627 #endif
21628
21629 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21630    inclusive.  These are offsets from the current stack pointer.  REGNO1
21631    is the index number of the 1st scratch register and LIVE_REGS is the
21632    mask of live registers.  */
21633
21634 static void
21635 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21636                             unsigned int regno1, unsigned long live_regs)
21637 {
21638   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21639
21640   /* See if we have a constant small number of probes to generate.  If so,
21641      that's the easy case.  */
21642   if (size <= PROBE_INTERVAL)
21643     {
21644       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21645       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21646       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21647     }
21648
21649   /* The run-time loop is made up of 10 insns in the generic case while the
21650      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21651   else if (size <= 5 * PROBE_INTERVAL)
21652     {
21653       HOST_WIDE_INT i, rem;
21654
21655       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21656       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21657       emit_stack_probe (reg1);
21658
21659       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21660          it exceeds SIZE.  If only two probes are needed, this will not
21661          generate any code.  Then probe at FIRST + SIZE.  */
21662       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21663         {
21664           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21665           emit_stack_probe (reg1);
21666         }
21667
21668       rem = size - (i - PROBE_INTERVAL);
21669       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21670         {
21671           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21672           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21673         }
21674       else
21675         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21676     }
21677
21678   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21679      extra careful with variables wrapping around because we might be at
21680      the very top (or the very bottom) of the address space and we have
21681      to be able to handle this case properly; in particular, we use an
21682      equality test for the loop condition.  */
21683   else
21684     {
21685       HOST_WIDE_INT rounded_size;
21686       struct scratch_reg sr;
21687
21688       get_scratch_register_on_entry (&sr, regno1, live_regs);
21689
21690       emit_move_insn (reg1, GEN_INT (first));
21691
21692
21693       /* Step 1: round SIZE to the previous multiple of the interval.  */
21694
21695       rounded_size = size & -PROBE_INTERVAL;
21696       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21697
21698
21699       /* Step 2: compute initial and final value of the loop counter.  */
21700
21701       /* TEST_ADDR = SP + FIRST.  */
21702       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21703
21704       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21705       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21706
21707
21708       /* Step 3: the loop
21709
21710          do
21711            {
21712              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21713              probe at TEST_ADDR
21714            }
21715          while (TEST_ADDR != LAST_ADDR)
21716
21717          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21718          until it is equal to ROUNDED_SIZE.  */
21719
21720       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21721
21722
21723       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21724          that SIZE is equal to ROUNDED_SIZE.  */
21725
21726       if (size != rounded_size)
21727         {
21728           HOST_WIDE_INT rem = size - rounded_size;
21729
21730           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21731             {
21732               emit_set_insn (sr.reg,
21733                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21734               emit_stack_probe (plus_constant (Pmode, sr.reg,
21735                                                PROBE_INTERVAL - rem));
21736             }
21737           else
21738             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21739         }
21740
21741       release_scratch_register_on_entry (&sr);
21742     }
21743
21744   /* Make sure nothing is scheduled before we are done.  */
21745   emit_insn (gen_blockage ());
21746 }
21747
21748 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21749    absolute addresses.  */
21750
21751 const char *
21752 output_probe_stack_range (rtx reg1, rtx reg2)
21753 {
21754   static int labelno = 0;
21755   char loop_lab[32];
21756   rtx xops[2];
21757
21758   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21759
21760   /* Loop.  */
21761   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21762
21763   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21764   xops[0] = reg1;
21765   xops[1] = GEN_INT (PROBE_INTERVAL);
21766   output_asm_insn ("sub\t%0, %0, %1", xops);
21767
21768   /* Probe at TEST_ADDR.  */
21769   output_asm_insn ("str\tr0, [%0, #0]", xops);
21770
21771   /* Test if TEST_ADDR == LAST_ADDR.  */
21772   xops[1] = reg2;
21773   output_asm_insn ("cmp\t%0, %1", xops);
21774
21775   /* Branch.  */
21776   fputs ("\tbne\t", asm_out_file);
21777   assemble_name_raw (asm_out_file, loop_lab);
21778   fputc ('\n', asm_out_file);
21779
21780   return "";
21781 }
21782
21783 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21784    function.  */
21785 void
21786 arm_expand_prologue (void)
21787 {
21788   rtx amount;
21789   rtx insn;
21790   rtx ip_rtx;
21791   unsigned long live_regs_mask;
21792   unsigned long func_type;
21793   int fp_offset = 0;
21794   int saved_pretend_args = 0;
21795   int saved_regs = 0;
21796   unsigned HOST_WIDE_INT args_to_push;
21797   HOST_WIDE_INT size;
21798   arm_stack_offsets *offsets;
21799   bool clobber_ip;
21800
21801   func_type = arm_current_func_type ();
21802
21803   /* Naked functions don't have prologues.  */
21804   if (IS_NAKED (func_type))
21805     {
21806       if (flag_stack_usage_info)
21807         current_function_static_stack_size = 0;
21808       return;
21809     }
21810
21811   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21812   args_to_push = crtl->args.pretend_args_size;
21813
21814   /* Compute which register we will have to save onto the stack.  */
21815   offsets = arm_get_frame_offsets ();
21816   live_regs_mask = offsets->saved_regs_mask;
21817
21818   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21819
21820   if (IS_STACKALIGN (func_type))
21821     {
21822       rtx r0, r1;
21823
21824       /* Handle a word-aligned stack pointer.  We generate the following:
21825
21826           mov r0, sp
21827           bic r1, r0, #7
21828           mov sp, r1
21829           <save and restore r0 in normal prologue/epilogue>
21830           mov sp, r0
21831           bx lr
21832
21833          The unwinder doesn't need to know about the stack realignment.
21834          Just tell it we saved SP in r0.  */
21835       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21836
21837       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21838       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21839
21840       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21841       RTX_FRAME_RELATED_P (insn) = 1;
21842       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21843
21844       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21845
21846       /* ??? The CFA changes here, which may cause GDB to conclude that it
21847          has entered a different function.  That said, the unwind info is
21848          correct, individually, before and after this instruction because
21849          we've described the save of SP, which will override the default
21850          handling of SP as restoring from the CFA.  */
21851       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21852     }
21853
21854   /* Let's compute the static_chain_stack_bytes required and store it.  Right
21855      now the value must be -1 as stored by arm_init_machine_status ().  */
21856   cfun->machine->static_chain_stack_bytes
21857     = arm_compute_static_chain_stack_bytes ();
21858
21859   /* The static chain register is the same as the IP register.  If it is
21860      clobbered when creating the frame, we need to save and restore it.  */
21861   clobber_ip = IS_NESTED (func_type)
21862                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21863                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21864                         || flag_stack_clash_protection)
21865                        && !df_regs_ever_live_p (LR_REGNUM)
21866                        && arm_r3_live_at_start_p ()));
21867
21868   /* Find somewhere to store IP whilst the frame is being created.
21869      We try the following places in order:
21870
21871        1. The last argument register r3 if it is available.
21872        2. A slot on the stack above the frame if there are no
21873           arguments to push onto the stack.
21874        3. Register r3 again, after pushing the argument registers
21875           onto the stack, if this is a varargs function.
21876        4. The last slot on the stack created for the arguments to
21877           push, if this isn't a varargs function.
21878
21879      Note - we only need to tell the dwarf2 backend about the SP
21880      adjustment in the second variant; the static chain register
21881      doesn't need to be unwound, as it doesn't contain a value
21882      inherited from the caller.  */
21883   if (clobber_ip)
21884     {
21885       if (!arm_r3_live_at_start_p ())
21886         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21887       else if (args_to_push == 0)
21888         {
21889           rtx addr, dwarf;
21890
21891           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21892           saved_regs += 4;
21893
21894           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21895           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21896           fp_offset = 4;
21897
21898           /* Just tell the dwarf backend that we adjusted SP.  */
21899           dwarf = gen_rtx_SET (stack_pointer_rtx,
21900                                plus_constant (Pmode, stack_pointer_rtx,
21901                                               -fp_offset));
21902           RTX_FRAME_RELATED_P (insn) = 1;
21903           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21904         }
21905       else
21906         {
21907           /* Store the args on the stack.  */
21908           if (cfun->machine->uses_anonymous_args)
21909             {
21910               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21911                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21912               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21913               saved_pretend_args = 1;
21914             }
21915           else
21916             {
21917               rtx addr, dwarf;
21918
21919               if (args_to_push == 4)
21920                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21921               else
21922                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21923                                            plus_constant (Pmode,
21924                                                           stack_pointer_rtx,
21925                                                           -args_to_push));
21926
21927               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21928
21929               /* Just tell the dwarf backend that we adjusted SP.  */
21930               dwarf = gen_rtx_SET (stack_pointer_rtx,
21931                                    plus_constant (Pmode, stack_pointer_rtx,
21932                                                   -args_to_push));
21933               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21934             }
21935
21936           RTX_FRAME_RELATED_P (insn) = 1;
21937           fp_offset = args_to_push;
21938           args_to_push = 0;
21939         }
21940     }
21941
21942   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21943     {
21944       if (IS_INTERRUPT (func_type))
21945         {
21946           /* Interrupt functions must not corrupt any registers.
21947              Creating a frame pointer however, corrupts the IP
21948              register, so we must push it first.  */
21949           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21950
21951           /* Do not set RTX_FRAME_RELATED_P on this insn.
21952              The dwarf stack unwinding code only wants to see one
21953              stack decrement per function, and this is not it.  If
21954              this instruction is labeled as being part of the frame
21955              creation sequence then dwarf2out_frame_debug_expr will
21956              die when it encounters the assignment of IP to FP
21957              later on, since the use of SP here establishes SP as
21958              the CFA register and not IP.
21959
21960              Anyway this instruction is not really part of the stack
21961              frame creation although it is part of the prologue.  */
21962         }
21963
21964       insn = emit_set_insn (ip_rtx,
21965                             plus_constant (Pmode, stack_pointer_rtx,
21966                                            fp_offset));
21967       RTX_FRAME_RELATED_P (insn) = 1;
21968     }
21969
21970   if (args_to_push)
21971     {
21972       /* Push the argument registers, or reserve space for them.  */
21973       if (cfun->machine->uses_anonymous_args)
21974         insn = emit_multi_reg_push
21975           ((0xf0 >> (args_to_push / 4)) & 0xf,
21976            (0xf0 >> (args_to_push / 4)) & 0xf);
21977       else
21978         insn = emit_insn
21979           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21980                        GEN_INT (- args_to_push)));
21981       RTX_FRAME_RELATED_P (insn) = 1;
21982     }
21983
21984   /* If this is an interrupt service routine, and the link register
21985      is going to be pushed, and we're not generating extra
21986      push of IP (needed when frame is needed and frame layout if apcs),
21987      subtracting four from LR now will mean that the function return
21988      can be done with a single instruction.  */
21989   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21990       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21991       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21992       && TARGET_ARM)
21993     {
21994       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21995
21996       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21997     }
21998
21999   if (live_regs_mask)
22000     {
22001       unsigned long dwarf_regs_mask = live_regs_mask;
22002
22003       saved_regs += bit_count (live_regs_mask) * 4;
22004       if (optimize_size && !frame_pointer_needed
22005           && saved_regs == offsets->saved_regs - offsets->saved_args)
22006         {
22007           /* If no coprocessor registers are being pushed and we don't have
22008              to worry about a frame pointer then push extra registers to
22009              create the stack frame.  This is done in a way that does not
22010              alter the frame layout, so is independent of the epilogue.  */
22011           int n;
22012           int frame;
22013           n = 0;
22014           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
22015             n++;
22016           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
22017           if (frame && n * 4 >= frame)
22018             {
22019               n = frame / 4;
22020               live_regs_mask |= (1 << n) - 1;
22021               saved_regs += frame;
22022             }
22023         }
22024
22025       if (TARGET_LDRD
22026           && current_tune->prefer_ldrd_strd
22027           && !optimize_function_for_size_p (cfun))
22028         {
22029           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
22030           if (TARGET_THUMB2)
22031             thumb2_emit_strd_push (live_regs_mask);
22032           else if (TARGET_ARM
22033                    && !TARGET_APCS_FRAME
22034                    && !IS_INTERRUPT (func_type))
22035             arm_emit_strd_push (live_regs_mask);
22036           else
22037             {
22038               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
22039               RTX_FRAME_RELATED_P (insn) = 1;
22040             }
22041         }
22042       else
22043         {
22044           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
22045           RTX_FRAME_RELATED_P (insn) = 1;
22046         }
22047     }
22048
22049   if (! IS_VOLATILE (func_type))
22050     saved_regs += arm_save_coproc_regs ();
22051
22052   if (frame_pointer_needed && TARGET_ARM)
22053     {
22054       /* Create the new frame pointer.  */
22055       if (TARGET_APCS_FRAME)
22056         {
22057           insn = GEN_INT (-(4 + args_to_push + fp_offset));
22058           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
22059           RTX_FRAME_RELATED_P (insn) = 1;
22060         }
22061       else
22062         {
22063           insn = GEN_INT (saved_regs - (4 + fp_offset));
22064           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22065                                         stack_pointer_rtx, insn));
22066           RTX_FRAME_RELATED_P (insn) = 1;
22067         }
22068     }
22069
22070   size = offsets->outgoing_args - offsets->saved_args;
22071   if (flag_stack_usage_info)
22072     current_function_static_stack_size = size;
22073
22074   /* If this isn't an interrupt service routine and we have a frame, then do
22075      stack checking.  We use IP as the first scratch register, except for the
22076      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
22077   if (!IS_INTERRUPT (func_type)
22078       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22079           || flag_stack_clash_protection))
22080     {
22081       unsigned int regno;
22082
22083       if (!IS_NESTED (func_type) || clobber_ip)
22084         regno = IP_REGNUM;
22085       else if (df_regs_ever_live_p (LR_REGNUM))
22086         regno = LR_REGNUM;
22087       else
22088         regno = 3;
22089
22090       if (crtl->is_leaf && !cfun->calls_alloca)
22091         {
22092           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22093             arm_emit_probe_stack_range (get_stack_check_protect (),
22094                                         size - get_stack_check_protect (),
22095                                         regno, live_regs_mask);
22096         }
22097       else if (size > 0)
22098         arm_emit_probe_stack_range (get_stack_check_protect (), size,
22099                                     regno, live_regs_mask);
22100     }
22101
22102   /* Recover the static chain register.  */
22103   if (clobber_ip)
22104     {
22105       if (!arm_r3_live_at_start_p () || saved_pretend_args)
22106         insn = gen_rtx_REG (SImode, 3);
22107       else
22108         {
22109           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22110           insn = gen_frame_mem (SImode, insn);
22111         }
22112       emit_set_insn (ip_rtx, insn);
22113       emit_insn (gen_force_register_use (ip_rtx));
22114     }
22115
22116   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22117     {
22118       /* This add can produce multiple insns for a large constant, so we
22119          need to get tricky.  */
22120       rtx_insn *last = get_last_insn ();
22121
22122       amount = GEN_INT (offsets->saved_args + saved_regs
22123                         - offsets->outgoing_args);
22124
22125       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22126                                     amount));
22127       do
22128         {
22129           last = last ? NEXT_INSN (last) : get_insns ();
22130           RTX_FRAME_RELATED_P (last) = 1;
22131         }
22132       while (last != insn);
22133
22134       /* If the frame pointer is needed, emit a special barrier that
22135          will prevent the scheduler from moving stores to the frame
22136          before the stack adjustment.  */
22137       if (frame_pointer_needed)
22138         emit_insn (gen_stack_tie (stack_pointer_rtx,
22139                                   hard_frame_pointer_rtx));
22140     }
22141
22142
22143   if (frame_pointer_needed && TARGET_THUMB2)
22144     thumb_set_frame_pointer (offsets);
22145
22146   if (flag_pic && arm_pic_register != INVALID_REGNUM)
22147     {
22148       unsigned long mask;
22149
22150       mask = live_regs_mask;
22151       mask &= THUMB2_WORK_REGS;
22152       if (!IS_NESTED (func_type))
22153         mask |= (1 << IP_REGNUM);
22154       arm_load_pic_register (mask, NULL_RTX);
22155     }
22156
22157   /* If we are profiling, make sure no instructions are scheduled before
22158      the call to mcount.  Similarly if the user has requested no
22159      scheduling in the prolog.  Similarly if we want non-call exceptions
22160      using the EABI unwinder, to prevent faulting instructions from being
22161      swapped with a stack adjustment.  */
22162   if (crtl->profile || !TARGET_SCHED_PROLOG
22163       || (arm_except_unwind_info (&global_options) == UI_TARGET
22164           && cfun->can_throw_non_call_exceptions))
22165     emit_insn (gen_blockage ());
22166
22167   /* If the link register is being kept alive, with the return address in it,
22168      then make sure that it does not get reused by the ce2 pass.  */
22169   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22170     cfun->machine->lr_save_eliminated = 1;
22171 }
22172 \f
22173 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
22174 static void
22175 arm_print_condition (FILE *stream)
22176 {
22177   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22178     {
22179       /* Branch conversion is not implemented for Thumb-2.  */
22180       if (TARGET_THUMB)
22181         {
22182           output_operand_lossage ("predicated Thumb instruction");
22183           return;
22184         }
22185       if (current_insn_predicate != NULL)
22186         {
22187           output_operand_lossage
22188             ("predicated instruction in conditional sequence");
22189           return;
22190         }
22191
22192       fputs (arm_condition_codes[arm_current_cc], stream);
22193     }
22194   else if (current_insn_predicate)
22195     {
22196       enum arm_cond_code code;
22197
22198       if (TARGET_THUMB1)
22199         {
22200           output_operand_lossage ("predicated Thumb instruction");
22201           return;
22202         }
22203
22204       code = get_arm_condition_code (current_insn_predicate);
22205       fputs (arm_condition_codes[code], stream);
22206     }
22207 }
22208
22209
22210 /* Globally reserved letters: acln
22211    Puncutation letters currently used: @_|?().!#
22212    Lower case letters currently used: bcdefhimpqtvwxyz
22213    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22214    Letters previously used, but now deprecated/obsolete: sVWXYZ.
22215
22216    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22217
22218    If CODE is 'd', then the X is a condition operand and the instruction
22219    should only be executed if the condition is true.
22220    if CODE is 'D', then the X is a condition operand and the instruction
22221    should only be executed if the condition is false: however, if the mode
22222    of the comparison is CCFPEmode, then always execute the instruction -- we
22223    do this because in these circumstances !GE does not necessarily imply LT;
22224    in these cases the instruction pattern will take care to make sure that
22225    an instruction containing %d will follow, thereby undoing the effects of
22226    doing this instruction unconditionally.
22227    If CODE is 'N' then X is a floating point operand that must be negated
22228    before output.
22229    If CODE is 'B' then output a bitwise inverted value of X (a const int).
22230    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
22231 static void
22232 arm_print_operand (FILE *stream, rtx x, int code)
22233 {
22234   switch (code)
22235     {
22236     case '@':
22237       fputs (ASM_COMMENT_START, stream);
22238       return;
22239
22240     case '_':
22241       fputs (user_label_prefix, stream);
22242       return;
22243
22244     case '|':
22245       fputs (REGISTER_PREFIX, stream);
22246       return;
22247
22248     case '?':
22249       arm_print_condition (stream);
22250       return;
22251
22252     case '.':
22253       /* The current condition code for a condition code setting instruction.
22254          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
22255       fputc('s', stream);
22256       arm_print_condition (stream);
22257       return;
22258
22259     case '!':
22260       /* If the instruction is conditionally executed then print
22261          the current condition code, otherwise print 's'.  */
22262       gcc_assert (TARGET_THUMB2);
22263       if (current_insn_predicate)
22264         arm_print_condition (stream);
22265       else
22266         fputc('s', stream);
22267       break;
22268
22269     /* %# is a "break" sequence. It doesn't output anything, but is used to
22270        separate e.g. operand numbers from following text, if that text consists
22271        of further digits which we don't want to be part of the operand
22272        number.  */
22273     case '#':
22274       return;
22275
22276     case 'N':
22277       {
22278         REAL_VALUE_TYPE r;
22279         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22280         fprintf (stream, "%s", fp_const_from_val (&r));
22281       }
22282       return;
22283
22284     /* An integer or symbol address without a preceding # sign.  */
22285     case 'c':
22286       switch (GET_CODE (x))
22287         {
22288         case CONST_INT:
22289           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22290           break;
22291
22292         case SYMBOL_REF:
22293           output_addr_const (stream, x);
22294           break;
22295
22296         case CONST:
22297           if (GET_CODE (XEXP (x, 0)) == PLUS
22298               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22299             {
22300               output_addr_const (stream, x);
22301               break;
22302             }
22303           /* Fall through.  */
22304
22305         default:
22306           output_operand_lossage ("Unsupported operand for code '%c'", code);
22307         }
22308       return;
22309
22310     /* An integer that we want to print in HEX.  */
22311     case 'x':
22312       switch (GET_CODE (x))
22313         {
22314         case CONST_INT:
22315           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22316           break;
22317
22318         default:
22319           output_operand_lossage ("Unsupported operand for code '%c'", code);
22320         }
22321       return;
22322
22323     case 'B':
22324       if (CONST_INT_P (x))
22325         {
22326           HOST_WIDE_INT val;
22327           val = ARM_SIGN_EXTEND (~INTVAL (x));
22328           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22329         }
22330       else
22331         {
22332           putc ('~', stream);
22333           output_addr_const (stream, x);
22334         }
22335       return;
22336
22337     case 'b':
22338       /* Print the log2 of a CONST_INT.  */
22339       {
22340         HOST_WIDE_INT val;
22341
22342         if (!CONST_INT_P (x)
22343             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22344           output_operand_lossage ("Unsupported operand for code '%c'", code);
22345         else
22346           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22347       }
22348       return;
22349
22350     case 'L':
22351       /* The low 16 bits of an immediate constant.  */
22352       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22353       return;
22354
22355     case 'i':
22356       fprintf (stream, "%s", arithmetic_instr (x, 1));
22357       return;
22358
22359     case 'I':
22360       fprintf (stream, "%s", arithmetic_instr (x, 0));
22361       return;
22362
22363     case 'S':
22364       {
22365         HOST_WIDE_INT val;
22366         const char *shift;
22367
22368         shift = shift_op (x, &val);
22369
22370         if (shift)
22371           {
22372             fprintf (stream, ", %s ", shift);
22373             if (val == -1)
22374               arm_print_operand (stream, XEXP (x, 1), 0);
22375             else
22376               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22377           }
22378       }
22379       return;
22380
22381       /* An explanation of the 'Q', 'R' and 'H' register operands:
22382
22383          In a pair of registers containing a DI or DF value the 'Q'
22384          operand returns the register number of the register containing
22385          the least significant part of the value.  The 'R' operand returns
22386          the register number of the register containing the most
22387          significant part of the value.
22388
22389          The 'H' operand returns the higher of the two register numbers.
22390          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22391          same as the 'Q' operand, since the most significant part of the
22392          value is held in the lower number register.  The reverse is true
22393          on systems where WORDS_BIG_ENDIAN is false.
22394
22395          The purpose of these operands is to distinguish between cases
22396          where the endian-ness of the values is important (for example
22397          when they are added together), and cases where the endian-ness
22398          is irrelevant, but the order of register operations is important.
22399          For example when loading a value from memory into a register
22400          pair, the endian-ness does not matter.  Provided that the value
22401          from the lower memory address is put into the lower numbered
22402          register, and the value from the higher address is put into the
22403          higher numbered register, the load will work regardless of whether
22404          the value being loaded is big-wordian or little-wordian.  The
22405          order of the two register loads can matter however, if the address
22406          of the memory location is actually held in one of the registers
22407          being overwritten by the load.
22408
22409          The 'Q' and 'R' constraints are also available for 64-bit
22410          constants.  */
22411     case 'Q':
22412       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22413         {
22414           rtx part = gen_lowpart (SImode, x);
22415           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22416           return;
22417         }
22418
22419       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22420         {
22421           output_operand_lossage ("invalid operand for code '%c'", code);
22422           return;
22423         }
22424
22425       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22426       return;
22427
22428     case 'R':
22429       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22430         {
22431           machine_mode mode = GET_MODE (x);
22432           rtx part;
22433
22434           if (mode == VOIDmode)
22435             mode = DImode;
22436           part = gen_highpart_mode (SImode, mode, x);
22437           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22438           return;
22439         }
22440
22441       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22442         {
22443           output_operand_lossage ("invalid operand for code '%c'", code);
22444           return;
22445         }
22446
22447       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22448       return;
22449
22450     case 'H':
22451       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22452         {
22453           output_operand_lossage ("invalid operand for code '%c'", code);
22454           return;
22455         }
22456
22457       asm_fprintf (stream, "%r", REGNO (x) + 1);
22458       return;
22459
22460     case 'J':
22461       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22462         {
22463           output_operand_lossage ("invalid operand for code '%c'", code);
22464           return;
22465         }
22466
22467       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22468       return;
22469
22470     case 'K':
22471       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22472         {
22473           output_operand_lossage ("invalid operand for code '%c'", code);
22474           return;
22475         }
22476
22477       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22478       return;
22479
22480     case 'm':
22481       asm_fprintf (stream, "%r",
22482                    REG_P (XEXP (x, 0))
22483                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22484       return;
22485
22486     case 'M':
22487       asm_fprintf (stream, "{%r-%r}",
22488                    REGNO (x),
22489                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22490       return;
22491
22492     /* Like 'M', but writing doubleword vector registers, for use by Neon
22493        insns.  */
22494     case 'h':
22495       {
22496         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22497         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22498         if (numregs == 1)
22499           asm_fprintf (stream, "{d%d}", regno);
22500         else
22501           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22502       }
22503       return;
22504
22505     case 'd':
22506       /* CONST_TRUE_RTX means always -- that's the default.  */
22507       if (x == const_true_rtx)
22508         return;
22509
22510       if (!COMPARISON_P (x))
22511         {
22512           output_operand_lossage ("invalid operand for code '%c'", code);
22513           return;
22514         }
22515
22516       fputs (arm_condition_codes[get_arm_condition_code (x)],
22517              stream);
22518       return;
22519
22520     case 'D':
22521       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22522          want to do that.  */
22523       if (x == const_true_rtx)
22524         {
22525           output_operand_lossage ("instruction never executed");
22526           return;
22527         }
22528       if (!COMPARISON_P (x))
22529         {
22530           output_operand_lossage ("invalid operand for code '%c'", code);
22531           return;
22532         }
22533
22534       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22535                                  (get_arm_condition_code (x))],
22536              stream);
22537       return;
22538
22539     case 's':
22540     case 'V':
22541     case 'W':
22542     case 'X':
22543     case 'Y':
22544     case 'Z':
22545       /* Former Maverick support, removed after GCC-4.7.  */
22546       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22547       return;
22548
22549     case 'U':
22550       if (!REG_P (x)
22551           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22552           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22553         /* Bad value for wCG register number.  */
22554         {
22555           output_operand_lossage ("invalid operand for code '%c'", code);
22556           return;
22557         }
22558
22559       else
22560         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22561       return;
22562
22563       /* Print an iWMMXt control register name.  */
22564     case 'w':
22565       if (!CONST_INT_P (x)
22566           || INTVAL (x) < 0
22567           || INTVAL (x) >= 16)
22568         /* Bad value for wC register number.  */
22569         {
22570           output_operand_lossage ("invalid operand for code '%c'", code);
22571           return;
22572         }
22573
22574       else
22575         {
22576           static const char * wc_reg_names [16] =
22577             {
22578               "wCID",  "wCon",  "wCSSF", "wCASF",
22579               "wC4",   "wC5",   "wC6",   "wC7",
22580               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22581               "wC12",  "wC13",  "wC14",  "wC15"
22582             };
22583
22584           fputs (wc_reg_names [INTVAL (x)], stream);
22585         }
22586       return;
22587
22588     /* Print the high single-precision register of a VFP double-precision
22589        register.  */
22590     case 'p':
22591       {
22592         machine_mode mode = GET_MODE (x);
22593         int regno;
22594
22595         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22596           {
22597             output_operand_lossage ("invalid operand for code '%c'", code);
22598             return;
22599           }
22600
22601         regno = REGNO (x);
22602         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22603           {
22604             output_operand_lossage ("invalid operand for code '%c'", code);
22605             return;
22606           }
22607
22608         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22609       }
22610       return;
22611
22612     /* Print a VFP/Neon double precision or quad precision register name.  */
22613     case 'P':
22614     case 'q':
22615       {
22616         machine_mode mode = GET_MODE (x);
22617         int is_quad = (code == 'q');
22618         int regno;
22619
22620         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22621           {
22622             output_operand_lossage ("invalid operand for code '%c'", code);
22623             return;
22624           }
22625
22626         if (!REG_P (x)
22627             || !IS_VFP_REGNUM (REGNO (x)))
22628           {
22629             output_operand_lossage ("invalid operand for code '%c'", code);
22630             return;
22631           }
22632
22633         regno = REGNO (x);
22634         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22635             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22636           {
22637             output_operand_lossage ("invalid operand for code '%c'", code);
22638             return;
22639           }
22640
22641         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22642           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22643       }
22644       return;
22645
22646     /* These two codes print the low/high doubleword register of a Neon quad
22647        register, respectively.  For pair-structure types, can also print
22648        low/high quadword registers.  */
22649     case 'e':
22650     case 'f':
22651       {
22652         machine_mode mode = GET_MODE (x);
22653         int regno;
22654
22655         if ((GET_MODE_SIZE (mode) != 16
22656              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22657           {
22658             output_operand_lossage ("invalid operand for code '%c'", code);
22659             return;
22660           }
22661
22662         regno = REGNO (x);
22663         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22664           {
22665             output_operand_lossage ("invalid operand for code '%c'", code);
22666             return;
22667           }
22668
22669         if (GET_MODE_SIZE (mode) == 16)
22670           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22671                                   + (code == 'f' ? 1 : 0));
22672         else
22673           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22674                                   + (code == 'f' ? 1 : 0));
22675       }
22676       return;
22677
22678     /* Print a VFPv3 floating-point constant, represented as an integer
22679        index.  */
22680     case 'G':
22681       {
22682         int index = vfp3_const_double_index (x);
22683         gcc_assert (index != -1);
22684         fprintf (stream, "%d", index);
22685       }
22686       return;
22687
22688     /* Print bits representing opcode features for Neon.
22689
22690        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22691        and polynomials as unsigned.
22692
22693        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22694
22695        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22696
22697     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22698     case 'T':
22699       {
22700         HOST_WIDE_INT bits = INTVAL (x);
22701         fputc ("uspf"[bits & 3], stream);
22702       }
22703       return;
22704
22705     /* Likewise, but signed and unsigned integers are both 'i'.  */
22706     case 'F':
22707       {
22708         HOST_WIDE_INT bits = INTVAL (x);
22709         fputc ("iipf"[bits & 3], stream);
22710       }
22711       return;
22712
22713     /* As for 'T', but emit 'u' instead of 'p'.  */
22714     case 't':
22715       {
22716         HOST_WIDE_INT bits = INTVAL (x);
22717         fputc ("usuf"[bits & 3], stream);
22718       }
22719       return;
22720
22721     /* Bit 2: rounding (vs none).  */
22722     case 'O':
22723       {
22724         HOST_WIDE_INT bits = INTVAL (x);
22725         fputs ((bits & 4) != 0 ? "r" : "", stream);
22726       }
22727       return;
22728
22729     /* Memory operand for vld1/vst1 instruction.  */
22730     case 'A':
22731       {
22732         rtx addr;
22733         bool postinc = FALSE;
22734         rtx postinc_reg = NULL;
22735         unsigned align, memsize, align_bits;
22736
22737         gcc_assert (MEM_P (x));
22738         addr = XEXP (x, 0);
22739         if (GET_CODE (addr) == POST_INC)
22740           {
22741             postinc = 1;
22742             addr = XEXP (addr, 0);
22743           }
22744         if (GET_CODE (addr) == POST_MODIFY)
22745           {
22746             postinc_reg = XEXP( XEXP (addr, 1), 1);
22747             addr = XEXP (addr, 0);
22748           }
22749         asm_fprintf (stream, "[%r", REGNO (addr));
22750
22751         /* We know the alignment of this access, so we can emit a hint in the
22752            instruction (for some alignments) as an aid to the memory subsystem
22753            of the target.  */
22754         align = MEM_ALIGN (x) >> 3;
22755         memsize = MEM_SIZE (x);
22756
22757         /* Only certain alignment specifiers are supported by the hardware.  */
22758         if (memsize == 32 && (align % 32) == 0)
22759           align_bits = 256;
22760         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22761           align_bits = 128;
22762         else if (memsize >= 8 && (align % 8) == 0)
22763           align_bits = 64;
22764         else
22765           align_bits = 0;
22766
22767         if (align_bits != 0)
22768           asm_fprintf (stream, ":%d", align_bits);
22769
22770         asm_fprintf (stream, "]");
22771
22772         if (postinc)
22773           fputs("!", stream);
22774         if (postinc_reg)
22775           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22776       }
22777       return;
22778
22779     case 'C':
22780       {
22781         rtx addr;
22782
22783         gcc_assert (MEM_P (x));
22784         addr = XEXP (x, 0);
22785         gcc_assert (REG_P (addr));
22786         asm_fprintf (stream, "[%r]", REGNO (addr));
22787       }
22788       return;
22789
22790     /* Translate an S register number into a D register number and element index.  */
22791     case 'y':
22792       {
22793         machine_mode mode = GET_MODE (x);
22794         int regno;
22795
22796         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22797           {
22798             output_operand_lossage ("invalid operand for code '%c'", code);
22799             return;
22800           }
22801
22802         regno = REGNO (x);
22803         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22804           {
22805             output_operand_lossage ("invalid operand for code '%c'", code);
22806             return;
22807           }
22808
22809         regno = regno - FIRST_VFP_REGNUM;
22810         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22811       }
22812       return;
22813
22814     case 'v':
22815         gcc_assert (CONST_DOUBLE_P (x));
22816         int result;
22817         result = vfp3_const_double_for_fract_bits (x);
22818         if (result == 0)
22819           result = vfp3_const_double_for_bits (x);
22820         fprintf (stream, "#%d", result);
22821         return;
22822
22823     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22824        number into a D register number and element index.  */
22825     case 'z':
22826       {
22827         machine_mode mode = GET_MODE (x);
22828         int regno;
22829
22830         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22831           {
22832             output_operand_lossage ("invalid operand for code '%c'", code);
22833             return;
22834           }
22835
22836         regno = REGNO (x);
22837         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22838           {
22839             output_operand_lossage ("invalid operand for code '%c'", code);
22840             return;
22841           }
22842
22843         regno = regno - FIRST_VFP_REGNUM;
22844         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22845       }
22846       return;
22847
22848     default:
22849       if (x == 0)
22850         {
22851           output_operand_lossage ("missing operand");
22852           return;
22853         }
22854
22855       switch (GET_CODE (x))
22856         {
22857         case REG:
22858           asm_fprintf (stream, "%r", REGNO (x));
22859           break;
22860
22861         case MEM:
22862           output_address (GET_MODE (x), XEXP (x, 0));
22863           break;
22864
22865         case CONST_DOUBLE:
22866           {
22867             char fpstr[20];
22868             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22869                               sizeof (fpstr), 0, 1);
22870             fprintf (stream, "#%s", fpstr);
22871           }
22872           break;
22873
22874         default:
22875           gcc_assert (GET_CODE (x) != NEG);
22876           fputc ('#', stream);
22877           if (GET_CODE (x) == HIGH)
22878             {
22879               fputs (":lower16:", stream);
22880               x = XEXP (x, 0);
22881             }
22882
22883           output_addr_const (stream, x);
22884           break;
22885         }
22886     }
22887 }
22888 \f
22889 /* Target hook for printing a memory address.  */
22890 static void
22891 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22892 {
22893   if (TARGET_32BIT)
22894     {
22895       int is_minus = GET_CODE (x) == MINUS;
22896
22897       if (REG_P (x))
22898         asm_fprintf (stream, "[%r]", REGNO (x));
22899       else if (GET_CODE (x) == PLUS || is_minus)
22900         {
22901           rtx base = XEXP (x, 0);
22902           rtx index = XEXP (x, 1);
22903           HOST_WIDE_INT offset = 0;
22904           if (!REG_P (base)
22905               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22906             {
22907               /* Ensure that BASE is a register.  */
22908               /* (one of them must be).  */
22909               /* Also ensure the SP is not used as in index register.  */
22910               std::swap (base, index);
22911             }
22912           switch (GET_CODE (index))
22913             {
22914             case CONST_INT:
22915               offset = INTVAL (index);
22916               if (is_minus)
22917                 offset = -offset;
22918               asm_fprintf (stream, "[%r, #%wd]",
22919                            REGNO (base), offset);
22920               break;
22921
22922             case REG:
22923               asm_fprintf (stream, "[%r, %s%r]",
22924                            REGNO (base), is_minus ? "-" : "",
22925                            REGNO (index));
22926               break;
22927
22928             case MULT:
22929             case ASHIFTRT:
22930             case LSHIFTRT:
22931             case ASHIFT:
22932             case ROTATERT:
22933               {
22934                 asm_fprintf (stream, "[%r, %s%r",
22935                              REGNO (base), is_minus ? "-" : "",
22936                              REGNO (XEXP (index, 0)));
22937                 arm_print_operand (stream, index, 'S');
22938                 fputs ("]", stream);
22939                 break;
22940               }
22941
22942             default:
22943               gcc_unreachable ();
22944             }
22945         }
22946       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22947                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22948         {
22949           gcc_assert (REG_P (XEXP (x, 0)));
22950
22951           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22952             asm_fprintf (stream, "[%r, #%s%d]!",
22953                          REGNO (XEXP (x, 0)),
22954                          GET_CODE (x) == PRE_DEC ? "-" : "",
22955                          GET_MODE_SIZE (mode));
22956           else
22957             asm_fprintf (stream, "[%r], #%s%d",
22958                          REGNO (XEXP (x, 0)),
22959                          GET_CODE (x) == POST_DEC ? "-" : "",
22960                          GET_MODE_SIZE (mode));
22961         }
22962       else if (GET_CODE (x) == PRE_MODIFY)
22963         {
22964           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22965           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22966             asm_fprintf (stream, "#%wd]!",
22967                          INTVAL (XEXP (XEXP (x, 1), 1)));
22968           else
22969             asm_fprintf (stream, "%r]!",
22970                          REGNO (XEXP (XEXP (x, 1), 1)));
22971         }
22972       else if (GET_CODE (x) == POST_MODIFY)
22973         {
22974           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22975           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22976             asm_fprintf (stream, "#%wd",
22977                          INTVAL (XEXP (XEXP (x, 1), 1)));
22978           else
22979             asm_fprintf (stream, "%r",
22980                          REGNO (XEXP (XEXP (x, 1), 1)));
22981         }
22982       else output_addr_const (stream, x);
22983     }
22984   else
22985     {
22986       if (REG_P (x))
22987         asm_fprintf (stream, "[%r]", REGNO (x));
22988       else if (GET_CODE (x) == POST_INC)
22989         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22990       else if (GET_CODE (x) == PLUS)
22991         {
22992           gcc_assert (REG_P (XEXP (x, 0)));
22993           if (CONST_INT_P (XEXP (x, 1)))
22994             asm_fprintf (stream, "[%r, #%wd]",
22995                          REGNO (XEXP (x, 0)),
22996                          INTVAL (XEXP (x, 1)));
22997           else
22998             asm_fprintf (stream, "[%r, %r]",
22999                          REGNO (XEXP (x, 0)),
23000                          REGNO (XEXP (x, 1)));
23001         }
23002       else
23003         output_addr_const (stream, x);
23004     }
23005 }
23006 \f
23007 /* Target hook for indicating whether a punctuation character for
23008    TARGET_PRINT_OPERAND is valid.  */
23009 static bool
23010 arm_print_operand_punct_valid_p (unsigned char code)
23011 {
23012   return (code == '@' || code == '|' || code == '.'
23013           || code == '(' || code == ')' || code == '#'
23014           || (TARGET_32BIT && (code == '?'))
23015           || (TARGET_THUMB2 && (code == '!'))
23016           || (TARGET_THUMB && (code == '_')));
23017 }
23018 \f
23019 /* Target hook for assembling integer objects.  The ARM version needs to
23020    handle word-sized values specially.  */
23021 static bool
23022 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
23023 {
23024   machine_mode mode;
23025
23026   if (size == UNITS_PER_WORD && aligned_p)
23027     {
23028       fputs ("\t.word\t", asm_out_file);
23029       output_addr_const (asm_out_file, x);
23030
23031       /* Mark symbols as position independent.  We only do this in the
23032          .text segment, not in the .data segment.  */
23033       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
23034           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
23035         {
23036           /* See legitimize_pic_address for an explanation of the
23037              TARGET_VXWORKS_RTP check.  */
23038           /* References to weak symbols cannot be resolved locally:
23039              they may be overridden by a non-weak definition at link
23040              time.  */
23041           if (!arm_pic_data_is_text_relative
23042               || (GET_CODE (x) == SYMBOL_REF
23043                   && (!SYMBOL_REF_LOCAL_P (x)
23044                       || (SYMBOL_REF_DECL (x)
23045                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
23046             fputs ("(GOT)", asm_out_file);
23047           else
23048             fputs ("(GOTOFF)", asm_out_file);
23049         }
23050       fputc ('\n', asm_out_file);
23051       return true;
23052     }
23053
23054   mode = GET_MODE (x);
23055
23056   if (arm_vector_mode_supported_p (mode))
23057     {
23058       int i, units;
23059
23060       gcc_assert (GET_CODE (x) == CONST_VECTOR);
23061
23062       units = CONST_VECTOR_NUNITS (x);
23063       size = GET_MODE_UNIT_SIZE (mode);
23064
23065       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23066         for (i = 0; i < units; i++)
23067           {
23068             rtx elt = CONST_VECTOR_ELT (x, i);
23069             assemble_integer
23070               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
23071           }
23072       else
23073         for (i = 0; i < units; i++)
23074           {
23075             rtx elt = CONST_VECTOR_ELT (x, i);
23076             assemble_real
23077               (*CONST_DOUBLE_REAL_VALUE (elt),
23078                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
23079                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
23080           }
23081
23082       return true;
23083     }
23084
23085   return default_assemble_integer (x, size, aligned_p);
23086 }
23087
23088 static void
23089 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23090 {
23091   section *s;
23092
23093   if (!TARGET_AAPCS_BASED)
23094     {
23095       (is_ctor ?
23096        default_named_section_asm_out_constructor
23097        : default_named_section_asm_out_destructor) (symbol, priority);
23098       return;
23099     }
23100
23101   /* Put these in the .init_array section, using a special relocation.  */
23102   if (priority != DEFAULT_INIT_PRIORITY)
23103     {
23104       char buf[18];
23105       sprintf (buf, "%s.%.5u",
23106                is_ctor ? ".init_array" : ".fini_array",
23107                priority);
23108       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23109     }
23110   else if (is_ctor)
23111     s = ctors_section;
23112   else
23113     s = dtors_section;
23114
23115   switch_to_section (s);
23116   assemble_align (POINTER_SIZE);
23117   fputs ("\t.word\t", asm_out_file);
23118   output_addr_const (asm_out_file, symbol);
23119   fputs ("(target1)\n", asm_out_file);
23120 }
23121
23122 /* Add a function to the list of static constructors.  */
23123
23124 static void
23125 arm_elf_asm_constructor (rtx symbol, int priority)
23126 {
23127   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23128 }
23129
23130 /* Add a function to the list of static destructors.  */
23131
23132 static void
23133 arm_elf_asm_destructor (rtx symbol, int priority)
23134 {
23135   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23136 }
23137 \f
23138 /* A finite state machine takes care of noticing whether or not instructions
23139    can be conditionally executed, and thus decrease execution time and code
23140    size by deleting branch instructions.  The fsm is controlled by
23141    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
23142
23143 /* The state of the fsm controlling condition codes are:
23144    0: normal, do nothing special
23145    1: make ASM_OUTPUT_OPCODE not output this instruction
23146    2: make ASM_OUTPUT_OPCODE not output this instruction
23147    3: make instructions conditional
23148    4: make instructions conditional
23149
23150    State transitions (state->state by whom under condition):
23151    0 -> 1 final_prescan_insn if the `target' is a label
23152    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23153    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23154    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23155    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23156           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23157    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23158           (the target insn is arm_target_insn).
23159
23160    If the jump clobbers the conditions then we use states 2 and 4.
23161
23162    A similar thing can be done with conditional return insns.
23163
23164    XXX In case the `target' is an unconditional branch, this conditionalising
23165    of the instructions always reduces code size, but not always execution
23166    time.  But then, I want to reduce the code size to somewhere near what
23167    /bin/cc produces.  */
23168
23169 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23170    instructions.  When a COND_EXEC instruction is seen the subsequent
23171    instructions are scanned so that multiple conditional instructions can be
23172    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
23173    specify the length and true/false mask for the IT block.  These will be
23174    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
23175
23176 /* Returns the index of the ARM condition code string in
23177    `arm_condition_codes', or ARM_NV if the comparison is invalid.
23178    COMPARISON should be an rtx like `(eq (...) (...))'.  */
23179
23180 enum arm_cond_code
23181 maybe_get_arm_condition_code (rtx comparison)
23182 {
23183   machine_mode mode = GET_MODE (XEXP (comparison, 0));
23184   enum arm_cond_code code;
23185   enum rtx_code comp_code = GET_CODE (comparison);
23186
23187   if (GET_MODE_CLASS (mode) != MODE_CC)
23188     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23189                            XEXP (comparison, 1));
23190
23191   switch (mode)
23192     {
23193     case E_CC_DNEmode: code = ARM_NE; goto dominance;
23194     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23195     case E_CC_DGEmode: code = ARM_GE; goto dominance;
23196     case E_CC_DGTmode: code = ARM_GT; goto dominance;
23197     case E_CC_DLEmode: code = ARM_LE; goto dominance;
23198     case E_CC_DLTmode: code = ARM_LT; goto dominance;
23199     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23200     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23201     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23202     case E_CC_DLTUmode: code = ARM_CC;
23203
23204     dominance:
23205       if (comp_code == EQ)
23206         return ARM_INVERSE_CONDITION_CODE (code);
23207       if (comp_code == NE)
23208         return code;
23209       return ARM_NV;
23210
23211     case E_CC_NOOVmode:
23212       switch (comp_code)
23213         {
23214         case NE: return ARM_NE;
23215         case EQ: return ARM_EQ;
23216         case GE: return ARM_PL;
23217         case LT: return ARM_MI;
23218         default: return ARM_NV;
23219         }
23220
23221     case E_CC_Zmode:
23222       switch (comp_code)
23223         {
23224         case NE: return ARM_NE;
23225         case EQ: return ARM_EQ;
23226         default: return ARM_NV;
23227         }
23228
23229     case E_CC_Nmode:
23230       switch (comp_code)
23231         {
23232         case NE: return ARM_MI;
23233         case EQ: return ARM_PL;
23234         default: return ARM_NV;
23235         }
23236
23237     case E_CCFPEmode:
23238     case E_CCFPmode:
23239       /* We can handle all cases except UNEQ and LTGT.  */
23240       switch (comp_code)
23241         {
23242         case GE: return ARM_GE;
23243         case GT: return ARM_GT;
23244         case LE: return ARM_LS;
23245         case LT: return ARM_MI;
23246         case NE: return ARM_NE;
23247         case EQ: return ARM_EQ;
23248         case ORDERED: return ARM_VC;
23249         case UNORDERED: return ARM_VS;
23250         case UNLT: return ARM_LT;
23251         case UNLE: return ARM_LE;
23252         case UNGT: return ARM_HI;
23253         case UNGE: return ARM_PL;
23254           /* UNEQ and LTGT do not have a representation.  */
23255         case UNEQ: /* Fall through.  */
23256         case LTGT: /* Fall through.  */
23257         default: return ARM_NV;
23258         }
23259
23260     case E_CC_SWPmode:
23261       switch (comp_code)
23262         {
23263         case NE: return ARM_NE;
23264         case EQ: return ARM_EQ;
23265         case GE: return ARM_LE;
23266         case GT: return ARM_LT;
23267         case LE: return ARM_GE;
23268         case LT: return ARM_GT;
23269         case GEU: return ARM_LS;
23270         case GTU: return ARM_CC;
23271         case LEU: return ARM_CS;
23272         case LTU: return ARM_HI;
23273         default: return ARM_NV;
23274         }
23275
23276     case E_CC_Cmode:
23277       switch (comp_code)
23278         {
23279         case LTU: return ARM_CS;
23280         case GEU: return ARM_CC;
23281         case NE: return ARM_CS;
23282         case EQ: return ARM_CC;
23283         default: return ARM_NV;
23284         }
23285
23286     case E_CC_CZmode:
23287       switch (comp_code)
23288         {
23289         case NE: return ARM_NE;
23290         case EQ: return ARM_EQ;
23291         case GEU: return ARM_CS;
23292         case GTU: return ARM_HI;
23293         case LEU: return ARM_LS;
23294         case LTU: return ARM_CC;
23295         default: return ARM_NV;
23296         }
23297
23298     case E_CC_NCVmode:
23299       switch (comp_code)
23300         {
23301         case GE: return ARM_GE;
23302         case LT: return ARM_LT;
23303         case GEU: return ARM_CS;
23304         case LTU: return ARM_CC;
23305         default: return ARM_NV;
23306         }
23307
23308     case E_CC_Vmode:
23309       switch (comp_code)
23310         {
23311         case NE: return ARM_VS;
23312         case EQ: return ARM_VC;
23313         default: return ARM_NV;
23314         }
23315
23316     case E_CCmode:
23317       switch (comp_code)
23318         {
23319         case NE: return ARM_NE;
23320         case EQ: return ARM_EQ;
23321         case GE: return ARM_GE;
23322         case GT: return ARM_GT;
23323         case LE: return ARM_LE;
23324         case LT: return ARM_LT;
23325         case GEU: return ARM_CS;
23326         case GTU: return ARM_HI;
23327         case LEU: return ARM_LS;
23328         case LTU: return ARM_CC;
23329         default: return ARM_NV;
23330         }
23331
23332     default: gcc_unreachable ();
23333     }
23334 }
23335
23336 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23337 static enum arm_cond_code
23338 get_arm_condition_code (rtx comparison)
23339 {
23340   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23341   gcc_assert (code != ARM_NV);
23342   return code;
23343 }
23344
23345 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23346    code registers when not targetting Thumb1.  The VFP condition register
23347    only exists when generating hard-float code.  */
23348 static bool
23349 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23350 {
23351   if (!TARGET_32BIT)
23352     return false;
23353
23354   *p1 = CC_REGNUM;
23355   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23356   return true;
23357 }
23358
23359 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23360    instructions.  */
23361 void
23362 thumb2_final_prescan_insn (rtx_insn *insn)
23363 {
23364   rtx_insn *first_insn = insn;
23365   rtx body = PATTERN (insn);
23366   rtx predicate;
23367   enum arm_cond_code code;
23368   int n;
23369   int mask;
23370   int max;
23371
23372   /* max_insns_skipped in the tune was already taken into account in the
23373      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23374      just emit the IT blocks as we can.  It does not make sense to split
23375      the IT blocks.  */
23376   max = MAX_INSN_PER_IT_BLOCK;
23377
23378   /* Remove the previous insn from the count of insns to be output.  */
23379   if (arm_condexec_count)
23380       arm_condexec_count--;
23381
23382   /* Nothing to do if we are already inside a conditional block.  */
23383   if (arm_condexec_count)
23384     return;
23385
23386   if (GET_CODE (body) != COND_EXEC)
23387     return;
23388
23389   /* Conditional jumps are implemented directly.  */
23390   if (JUMP_P (insn))
23391     return;
23392
23393   predicate = COND_EXEC_TEST (body);
23394   arm_current_cc = get_arm_condition_code (predicate);
23395
23396   n = get_attr_ce_count (insn);
23397   arm_condexec_count = 1;
23398   arm_condexec_mask = (1 << n) - 1;
23399   arm_condexec_masklen = n;
23400   /* See if subsequent instructions can be combined into the same block.  */
23401   for (;;)
23402     {
23403       insn = next_nonnote_insn (insn);
23404
23405       /* Jumping into the middle of an IT block is illegal, so a label or
23406          barrier terminates the block.  */
23407       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23408         break;
23409
23410       body = PATTERN (insn);
23411       /* USE and CLOBBER aren't really insns, so just skip them.  */
23412       if (GET_CODE (body) == USE
23413           || GET_CODE (body) == CLOBBER)
23414         continue;
23415
23416       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23417       if (GET_CODE (body) != COND_EXEC)
23418         break;
23419       /* Maximum number of conditionally executed instructions in a block.  */
23420       n = get_attr_ce_count (insn);
23421       if (arm_condexec_masklen + n > max)
23422         break;
23423
23424       predicate = COND_EXEC_TEST (body);
23425       code = get_arm_condition_code (predicate);
23426       mask = (1 << n) - 1;
23427       if (arm_current_cc == code)
23428         arm_condexec_mask |= (mask << arm_condexec_masklen);
23429       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23430         break;
23431
23432       arm_condexec_count++;
23433       arm_condexec_masklen += n;
23434
23435       /* A jump must be the last instruction in a conditional block.  */
23436       if (JUMP_P (insn))
23437         break;
23438     }
23439   /* Restore recog_data (getting the attributes of other insns can
23440      destroy this array, but final.c assumes that it remains intact
23441      across this call).  */
23442   extract_constrain_insn_cached (first_insn);
23443 }
23444
23445 void
23446 arm_final_prescan_insn (rtx_insn *insn)
23447 {
23448   /* BODY will hold the body of INSN.  */
23449   rtx body = PATTERN (insn);
23450
23451   /* This will be 1 if trying to repeat the trick, and things need to be
23452      reversed if it appears to fail.  */
23453   int reverse = 0;
23454
23455   /* If we start with a return insn, we only succeed if we find another one.  */
23456   int seeking_return = 0;
23457   enum rtx_code return_code = UNKNOWN;
23458
23459   /* START_INSN will hold the insn from where we start looking.  This is the
23460      first insn after the following code_label if REVERSE is true.  */
23461   rtx_insn *start_insn = insn;
23462
23463   /* If in state 4, check if the target branch is reached, in order to
23464      change back to state 0.  */
23465   if (arm_ccfsm_state == 4)
23466     {
23467       if (insn == arm_target_insn)
23468         {
23469           arm_target_insn = NULL;
23470           arm_ccfsm_state = 0;
23471         }
23472       return;
23473     }
23474
23475   /* If in state 3, it is possible to repeat the trick, if this insn is an
23476      unconditional branch to a label, and immediately following this branch
23477      is the previous target label which is only used once, and the label this
23478      branch jumps to is not too far off.  */
23479   if (arm_ccfsm_state == 3)
23480     {
23481       if (simplejump_p (insn))
23482         {
23483           start_insn = next_nonnote_insn (start_insn);
23484           if (BARRIER_P (start_insn))
23485             {
23486               /* XXX Isn't this always a barrier?  */
23487               start_insn = next_nonnote_insn (start_insn);
23488             }
23489           if (LABEL_P (start_insn)
23490               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23491               && LABEL_NUSES (start_insn) == 1)
23492             reverse = TRUE;
23493           else
23494             return;
23495         }
23496       else if (ANY_RETURN_P (body))
23497         {
23498           start_insn = next_nonnote_insn (start_insn);
23499           if (BARRIER_P (start_insn))
23500             start_insn = next_nonnote_insn (start_insn);
23501           if (LABEL_P (start_insn)
23502               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23503               && LABEL_NUSES (start_insn) == 1)
23504             {
23505               reverse = TRUE;
23506               seeking_return = 1;
23507               return_code = GET_CODE (body);
23508             }
23509           else
23510             return;
23511         }
23512       else
23513         return;
23514     }
23515
23516   gcc_assert (!arm_ccfsm_state || reverse);
23517   if (!JUMP_P (insn))
23518     return;
23519
23520   /* This jump might be paralleled with a clobber of the condition codes
23521      the jump should always come first */
23522   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23523     body = XVECEXP (body, 0, 0);
23524
23525   if (reverse
23526       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23527           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23528     {
23529       int insns_skipped;
23530       int fail = FALSE, succeed = FALSE;
23531       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23532       int then_not_else = TRUE;
23533       rtx_insn *this_insn = start_insn;
23534       rtx label = 0;
23535
23536       /* Register the insn jumped to.  */
23537       if (reverse)
23538         {
23539           if (!seeking_return)
23540             label = XEXP (SET_SRC (body), 0);
23541         }
23542       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23543         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23544       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23545         {
23546           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23547           then_not_else = FALSE;
23548         }
23549       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23550         {
23551           seeking_return = 1;
23552           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23553         }
23554       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23555         {
23556           seeking_return = 1;
23557           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23558           then_not_else = FALSE;
23559         }
23560       else
23561         gcc_unreachable ();
23562
23563       /* See how many insns this branch skips, and what kind of insns.  If all
23564          insns are okay, and the label or unconditional branch to the same
23565          label is not too far away, succeed.  */
23566       for (insns_skipped = 0;
23567            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23568         {
23569           rtx scanbody;
23570
23571           this_insn = next_nonnote_insn (this_insn);
23572           if (!this_insn)
23573             break;
23574
23575           switch (GET_CODE (this_insn))
23576             {
23577             case CODE_LABEL:
23578               /* Succeed if it is the target label, otherwise fail since
23579                  control falls in from somewhere else.  */
23580               if (this_insn == label)
23581                 {
23582                   arm_ccfsm_state = 1;
23583                   succeed = TRUE;
23584                 }
23585               else
23586                 fail = TRUE;
23587               break;
23588
23589             case BARRIER:
23590               /* Succeed if the following insn is the target label.
23591                  Otherwise fail.
23592                  If return insns are used then the last insn in a function
23593                  will be a barrier.  */
23594               this_insn = next_nonnote_insn (this_insn);
23595               if (this_insn && this_insn == label)
23596                 {
23597                   arm_ccfsm_state = 1;
23598                   succeed = TRUE;
23599                 }
23600               else
23601                 fail = TRUE;
23602               break;
23603
23604             case CALL_INSN:
23605               /* The AAPCS says that conditional calls should not be
23606                  used since they make interworking inefficient (the
23607                  linker can't transform BL<cond> into BLX).  That's
23608                  only a problem if the machine has BLX.  */
23609               if (arm_arch5t)
23610                 {
23611                   fail = TRUE;
23612                   break;
23613                 }
23614
23615               /* Succeed if the following insn is the target label, or
23616                  if the following two insns are a barrier and the
23617                  target label.  */
23618               this_insn = next_nonnote_insn (this_insn);
23619               if (this_insn && BARRIER_P (this_insn))
23620                 this_insn = next_nonnote_insn (this_insn);
23621
23622               if (this_insn && this_insn == label
23623                   && insns_skipped < max_insns_skipped)
23624                 {
23625                   arm_ccfsm_state = 1;
23626                   succeed = TRUE;
23627                 }
23628               else
23629                 fail = TRUE;
23630               break;
23631
23632             case JUMP_INSN:
23633               /* If this is an unconditional branch to the same label, succeed.
23634                  If it is to another label, do nothing.  If it is conditional,
23635                  fail.  */
23636               /* XXX Probably, the tests for SET and the PC are
23637                  unnecessary.  */
23638
23639               scanbody = PATTERN (this_insn);
23640               if (GET_CODE (scanbody) == SET
23641                   && GET_CODE (SET_DEST (scanbody)) == PC)
23642                 {
23643                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23644                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23645                     {
23646                       arm_ccfsm_state = 2;
23647                       succeed = TRUE;
23648                     }
23649                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23650                     fail = TRUE;
23651                 }
23652               /* Fail if a conditional return is undesirable (e.g. on a
23653                  StrongARM), but still allow this if optimizing for size.  */
23654               else if (GET_CODE (scanbody) == return_code
23655                        && !use_return_insn (TRUE, NULL)
23656                        && !optimize_size)
23657                 fail = TRUE;
23658               else if (GET_CODE (scanbody) == return_code)
23659                 {
23660                   arm_ccfsm_state = 2;
23661                   succeed = TRUE;
23662                 }
23663               else if (GET_CODE (scanbody) == PARALLEL)
23664                 {
23665                   switch (get_attr_conds (this_insn))
23666                     {
23667                     case CONDS_NOCOND:
23668                       break;
23669                     default:
23670                       fail = TRUE;
23671                       break;
23672                     }
23673                 }
23674               else
23675                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23676
23677               break;
23678
23679             case INSN:
23680               /* Instructions using or affecting the condition codes make it
23681                  fail.  */
23682               scanbody = PATTERN (this_insn);
23683               if (!(GET_CODE (scanbody) == SET
23684                     || GET_CODE (scanbody) == PARALLEL)
23685                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23686                 fail = TRUE;
23687               break;
23688
23689             default:
23690               break;
23691             }
23692         }
23693       if (succeed)
23694         {
23695           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23696             arm_target_label = CODE_LABEL_NUMBER (label);
23697           else
23698             {
23699               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23700
23701               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23702                 {
23703                   this_insn = next_nonnote_insn (this_insn);
23704                   gcc_assert (!this_insn
23705                               || (!BARRIER_P (this_insn)
23706                                   && !LABEL_P (this_insn)));
23707                 }
23708               if (!this_insn)
23709                 {
23710                   /* Oh, dear! we ran off the end.. give up.  */
23711                   extract_constrain_insn_cached (insn);
23712                   arm_ccfsm_state = 0;
23713                   arm_target_insn = NULL;
23714                   return;
23715                 }
23716               arm_target_insn = this_insn;
23717             }
23718
23719           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23720              what it was.  */
23721           if (!reverse)
23722             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23723
23724           if (reverse || then_not_else)
23725             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23726         }
23727
23728       /* Restore recog_data (getting the attributes of other insns can
23729          destroy this array, but final.c assumes that it remains intact
23730          across this call.  */
23731       extract_constrain_insn_cached (insn);
23732     }
23733 }
23734
23735 /* Output IT instructions.  */
23736 void
23737 thumb2_asm_output_opcode (FILE * stream)
23738 {
23739   char buff[5];
23740   int n;
23741
23742   if (arm_condexec_mask)
23743     {
23744       for (n = 0; n < arm_condexec_masklen; n++)
23745         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23746       buff[n] = 0;
23747       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23748                   arm_condition_codes[arm_current_cc]);
23749       arm_condexec_mask = 0;
23750     }
23751 }
23752
23753 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23754    UNITS_PER_WORD bytes wide.  */
23755 static unsigned int
23756 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23757 {
23758   if (TARGET_32BIT
23759       && regno > PC_REGNUM
23760       && regno != FRAME_POINTER_REGNUM
23761       && regno != ARG_POINTER_REGNUM
23762       && !IS_VFP_REGNUM (regno))
23763     return 1;
23764
23765   return ARM_NUM_REGS (mode);
23766 }
23767
23768 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23769 static bool
23770 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23771 {
23772   if (GET_MODE_CLASS (mode) == MODE_CC)
23773     return (regno == CC_REGNUM
23774             || (TARGET_HARD_FLOAT
23775                 && regno == VFPCC_REGNUM));
23776
23777   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23778     return false;
23779
23780   if (TARGET_THUMB1)
23781     /* For the Thumb we only allow values bigger than SImode in
23782        registers 0 - 6, so that there is always a second low
23783        register available to hold the upper part of the value.
23784        We probably we ought to ensure that the register is the
23785        start of an even numbered register pair.  */
23786     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23787
23788   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23789     {
23790       if (mode == SFmode || mode == SImode)
23791         return VFP_REGNO_OK_FOR_SINGLE (regno);
23792
23793       if (mode == DFmode)
23794         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23795
23796       if (mode == HFmode)
23797         return VFP_REGNO_OK_FOR_SINGLE (regno);
23798
23799       /* VFP registers can hold HImode values.  */
23800       if (mode == HImode)
23801         return VFP_REGNO_OK_FOR_SINGLE (regno);
23802
23803       if (TARGET_NEON)
23804         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23805                || (VALID_NEON_QREG_MODE (mode)
23806                    && NEON_REGNO_OK_FOR_QUAD (regno))
23807                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23808                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23809                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23810                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23811                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23812
23813       return false;
23814     }
23815
23816   if (TARGET_REALLY_IWMMXT)
23817     {
23818       if (IS_IWMMXT_GR_REGNUM (regno))
23819         return mode == SImode;
23820
23821       if (IS_IWMMXT_REGNUM (regno))
23822         return VALID_IWMMXT_REG_MODE (mode);
23823     }
23824
23825   /* We allow almost any value to be stored in the general registers.
23826      Restrict doubleword quantities to even register pairs in ARM state
23827      so that we can use ldrd.  Do not allow very large Neon structure
23828      opaque modes in general registers; they would use too many.  */
23829   if (regno <= LAST_ARM_REGNUM)
23830     {
23831       if (ARM_NUM_REGS (mode) > 4)
23832         return false;
23833
23834       if (TARGET_THUMB2)
23835         return true;
23836
23837       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23838     }
23839
23840   if (regno == FRAME_POINTER_REGNUM
23841       || regno == ARG_POINTER_REGNUM)
23842     /* We only allow integers in the fake hard registers.  */
23843     return GET_MODE_CLASS (mode) == MODE_INT;
23844
23845   return false;
23846 }
23847
23848 /* Implement TARGET_MODES_TIEABLE_P.  */
23849
23850 static bool
23851 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23852 {
23853   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23854     return true;
23855
23856   /* We specifically want to allow elements of "structure" modes to
23857      be tieable to the structure.  This more general condition allows
23858      other rarer situations too.  */
23859   if (TARGET_NEON
23860       && (VALID_NEON_DREG_MODE (mode1)
23861           || VALID_NEON_QREG_MODE (mode1)
23862           || VALID_NEON_STRUCT_MODE (mode1))
23863       && (VALID_NEON_DREG_MODE (mode2)
23864           || VALID_NEON_QREG_MODE (mode2)
23865           || VALID_NEON_STRUCT_MODE (mode2)))
23866     return true;
23867
23868   return false;
23869 }
23870
23871 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23872    not used in arm mode.  */
23873
23874 enum reg_class
23875 arm_regno_class (int regno)
23876 {
23877   if (regno == PC_REGNUM)
23878     return NO_REGS;
23879
23880   if (TARGET_THUMB1)
23881     {
23882       if (regno == STACK_POINTER_REGNUM)
23883         return STACK_REG;
23884       if (regno == CC_REGNUM)
23885         return CC_REG;
23886       if (regno < 8)
23887         return LO_REGS;
23888       return HI_REGS;
23889     }
23890
23891   if (TARGET_THUMB2 && regno < 8)
23892     return LO_REGS;
23893
23894   if (   regno <= LAST_ARM_REGNUM
23895       || regno == FRAME_POINTER_REGNUM
23896       || regno == ARG_POINTER_REGNUM)
23897     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23898
23899   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23900     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23901
23902   if (IS_VFP_REGNUM (regno))
23903     {
23904       if (regno <= D7_VFP_REGNUM)
23905         return VFP_D0_D7_REGS;
23906       else if (regno <= LAST_LO_VFP_REGNUM)
23907         return VFP_LO_REGS;
23908       else
23909         return VFP_HI_REGS;
23910     }
23911
23912   if (IS_IWMMXT_REGNUM (regno))
23913     return IWMMXT_REGS;
23914
23915   if (IS_IWMMXT_GR_REGNUM (regno))
23916     return IWMMXT_GR_REGS;
23917
23918   return NO_REGS;
23919 }
23920
23921 /* Handle a special case when computing the offset
23922    of an argument from the frame pointer.  */
23923 int
23924 arm_debugger_arg_offset (int value, rtx addr)
23925 {
23926   rtx_insn *insn;
23927
23928   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23929   if (value != 0)
23930     return 0;
23931
23932   /* We can only cope with the case where the address is held in a register.  */
23933   if (!REG_P (addr))
23934     return 0;
23935
23936   /* If we are using the frame pointer to point at the argument, then
23937      an offset of 0 is correct.  */
23938   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23939     return 0;
23940
23941   /* If we are using the stack pointer to point at the
23942      argument, then an offset of 0 is correct.  */
23943   /* ??? Check this is consistent with thumb2 frame layout.  */
23944   if ((TARGET_THUMB || !frame_pointer_needed)
23945       && REGNO (addr) == SP_REGNUM)
23946     return 0;
23947
23948   /* Oh dear.  The argument is pointed to by a register rather
23949      than being held in a register, or being stored at a known
23950      offset from the frame pointer.  Since GDB only understands
23951      those two kinds of argument we must translate the address
23952      held in the register into an offset from the frame pointer.
23953      We do this by searching through the insns for the function
23954      looking to see where this register gets its value.  If the
23955      register is initialized from the frame pointer plus an offset
23956      then we are in luck and we can continue, otherwise we give up.
23957
23958      This code is exercised by producing debugging information
23959      for a function with arguments like this:
23960
23961            double func (double a, double b, int c, double d) {return d;}
23962
23963      Without this code the stab for parameter 'd' will be set to
23964      an offset of 0 from the frame pointer, rather than 8.  */
23965
23966   /* The if() statement says:
23967
23968      If the insn is a normal instruction
23969      and if the insn is setting the value in a register
23970      and if the register being set is the register holding the address of the argument
23971      and if the address is computing by an addition
23972      that involves adding to a register
23973      which is the frame pointer
23974      a constant integer
23975
23976      then...  */
23977
23978   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23979     {
23980       if (   NONJUMP_INSN_P (insn)
23981           && GET_CODE (PATTERN (insn)) == SET
23982           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23983           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23984           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23985           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23986           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23987              )
23988         {
23989           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23990
23991           break;
23992         }
23993     }
23994
23995   if (value == 0)
23996     {
23997       debug_rtx (addr);
23998       warning (0, "unable to compute real location of stacked parameter");
23999       value = 8; /* XXX magic hack */
24000     }
24001
24002   return value;
24003 }
24004 \f
24005 /* Implement TARGET_PROMOTED_TYPE.  */
24006
24007 static tree
24008 arm_promoted_type (const_tree t)
24009 {
24010   if (SCALAR_FLOAT_TYPE_P (t)
24011       && TYPE_PRECISION (t) == 16
24012       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
24013     return float_type_node;
24014   return NULL_TREE;
24015 }
24016
24017 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24018    This simply adds HFmode as a supported mode; even though we don't
24019    implement arithmetic on this type directly, it's supported by
24020    optabs conversions, much the way the double-word arithmetic is
24021    special-cased in the default hook.  */
24022
24023 static bool
24024 arm_scalar_mode_supported_p (scalar_mode mode)
24025 {
24026   if (mode == HFmode)
24027     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24028   else if (ALL_FIXED_POINT_MODE_P (mode))
24029     return true;
24030   else
24031     return default_scalar_mode_supported_p (mode);
24032 }
24033
24034 /* Set the value of FLT_EVAL_METHOD.
24035    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24036
24037     0: evaluate all operations and constants, whose semantic type has at
24038        most the range and precision of type float, to the range and
24039        precision of float; evaluate all other operations and constants to
24040        the range and precision of the semantic type;
24041
24042     N, where _FloatN is a supported interchange floating type
24043        evaluate all operations and constants, whose semantic type has at
24044        most the range and precision of _FloatN type, to the range and
24045        precision of the _FloatN type; evaluate all other operations and
24046        constants to the range and precision of the semantic type;
24047
24048    If we have the ARMv8.2-A extensions then we support _Float16 in native
24049    precision, so we should set this to 16.  Otherwise, we support the type,
24050    but want to evaluate expressions in float precision, so set this to
24051    0.  */
24052
24053 static enum flt_eval_method
24054 arm_excess_precision (enum excess_precision_type type)
24055 {
24056   switch (type)
24057     {
24058       case EXCESS_PRECISION_TYPE_FAST:
24059       case EXCESS_PRECISION_TYPE_STANDARD:
24060         /* We can calculate either in 16-bit range and precision or
24061            32-bit range and precision.  Make that decision based on whether
24062            we have native support for the ARMv8.2-A 16-bit floating-point
24063            instructions or not.  */
24064         return (TARGET_VFP_FP16INST
24065                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24066                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
24067       case EXCESS_PRECISION_TYPE_IMPLICIT:
24068         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24069       default:
24070         gcc_unreachable ();
24071     }
24072   return FLT_EVAL_METHOD_UNPREDICTABLE;
24073 }
24074
24075
24076 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
24077    _Float16 if we are using anything other than ieee format for 16-bit
24078    floating point.  Otherwise, punt to the default implementation.  */
24079 static opt_scalar_float_mode
24080 arm_floatn_mode (int n, bool extended)
24081 {
24082   if (!extended && n == 16)
24083     {
24084       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
24085         return HFmode;
24086       return opt_scalar_float_mode ();
24087     }
24088
24089   return default_floatn_mode (n, extended);
24090 }
24091
24092
24093 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24094    not to early-clobber SRC registers in the process.
24095
24096    We assume that the operands described by SRC and DEST represent a
24097    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
24098    number of components into which the copy has been decomposed.  */
24099 void
24100 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24101 {
24102   unsigned int i;
24103
24104   if (!reg_overlap_mentioned_p (operands[0], operands[1])
24105       || REGNO (operands[0]) < REGNO (operands[1]))
24106     {
24107       for (i = 0; i < count; i++)
24108         {
24109           operands[2 * i] = dest[i];
24110           operands[2 * i + 1] = src[i];
24111         }
24112     }
24113   else
24114     {
24115       for (i = 0; i < count; i++)
24116         {
24117           operands[2 * i] = dest[count - i - 1];
24118           operands[2 * i + 1] = src[count - i - 1];
24119         }
24120     }
24121 }
24122
24123 /* Split operands into moves from op[1] + op[2] into op[0].  */
24124
24125 void
24126 neon_split_vcombine (rtx operands[3])
24127 {
24128   unsigned int dest = REGNO (operands[0]);
24129   unsigned int src1 = REGNO (operands[1]);
24130   unsigned int src2 = REGNO (operands[2]);
24131   machine_mode halfmode = GET_MODE (operands[1]);
24132   unsigned int halfregs = REG_NREGS (operands[1]);
24133   rtx destlo, desthi;
24134
24135   if (src1 == dest && src2 == dest + halfregs)
24136     {
24137       /* No-op move.  Can't split to nothing; emit something.  */
24138       emit_note (NOTE_INSN_DELETED);
24139       return;
24140     }
24141
24142   /* Preserve register attributes for variable tracking.  */
24143   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24144   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24145                                GET_MODE_SIZE (halfmode));
24146
24147   /* Special case of reversed high/low parts.  Use VSWP.  */
24148   if (src2 == dest && src1 == dest + halfregs)
24149     {
24150       rtx x = gen_rtx_SET (destlo, operands[1]);
24151       rtx y = gen_rtx_SET (desthi, operands[2]);
24152       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24153       return;
24154     }
24155
24156   if (!reg_overlap_mentioned_p (operands[2], destlo))
24157     {
24158       /* Try to avoid unnecessary moves if part of the result
24159          is in the right place already.  */
24160       if (src1 != dest)
24161         emit_move_insn (destlo, operands[1]);
24162       if (src2 != dest + halfregs)
24163         emit_move_insn (desthi, operands[2]);
24164     }
24165   else
24166     {
24167       if (src2 != dest + halfregs)
24168         emit_move_insn (desthi, operands[2]);
24169       if (src1 != dest)
24170         emit_move_insn (destlo, operands[1]);
24171     }
24172 }
24173 \f
24174 /* Return the number (counting from 0) of
24175    the least significant set bit in MASK.  */
24176
24177 inline static int
24178 number_of_first_bit_set (unsigned mask)
24179 {
24180   return ctz_hwi (mask);
24181 }
24182
24183 /* Like emit_multi_reg_push, but allowing for a different set of
24184    registers to be described as saved.  MASK is the set of registers
24185    to be saved; REAL_REGS is the set of registers to be described as
24186    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
24187
24188 static rtx_insn *
24189 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24190 {
24191   unsigned long regno;
24192   rtx par[10], tmp, reg;
24193   rtx_insn *insn;
24194   int i, j;
24195
24196   /* Build the parallel of the registers actually being stored.  */
24197   for (i = 0; mask; ++i, mask &= mask - 1)
24198     {
24199       regno = ctz_hwi (mask);
24200       reg = gen_rtx_REG (SImode, regno);
24201
24202       if (i == 0)
24203         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24204       else
24205         tmp = gen_rtx_USE (VOIDmode, reg);
24206
24207       par[i] = tmp;
24208     }
24209
24210   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24211   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24212   tmp = gen_frame_mem (BLKmode, tmp);
24213   tmp = gen_rtx_SET (tmp, par[0]);
24214   par[0] = tmp;
24215
24216   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24217   insn = emit_insn (tmp);
24218
24219   /* Always build the stack adjustment note for unwind info.  */
24220   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24221   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24222   par[0] = tmp;
24223
24224   /* Build the parallel of the registers recorded as saved for unwind.  */
24225   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24226     {
24227       regno = ctz_hwi (real_regs);
24228       reg = gen_rtx_REG (SImode, regno);
24229
24230       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24231       tmp = gen_frame_mem (SImode, tmp);
24232       tmp = gen_rtx_SET (tmp, reg);
24233       RTX_FRAME_RELATED_P (tmp) = 1;
24234       par[j + 1] = tmp;
24235     }
24236
24237   if (j == 0)
24238     tmp = par[0];
24239   else
24240     {
24241       RTX_FRAME_RELATED_P (par[0]) = 1;
24242       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24243     }
24244
24245   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24246
24247   return insn;
24248 }
24249
24250 /* Emit code to push or pop registers to or from the stack.  F is the
24251    assembly file.  MASK is the registers to pop.  */
24252 static void
24253 thumb_pop (FILE *f, unsigned long mask)
24254 {
24255   int regno;
24256   int lo_mask = mask & 0xFF;
24257
24258   gcc_assert (mask);
24259
24260   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24261     {
24262       /* Special case.  Do not generate a POP PC statement here, do it in
24263          thumb_exit() */
24264       thumb_exit (f, -1);
24265       return;
24266     }
24267
24268   fprintf (f, "\tpop\t{");
24269
24270   /* Look at the low registers first.  */
24271   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24272     {
24273       if (lo_mask & 1)
24274         {
24275           asm_fprintf (f, "%r", regno);
24276
24277           if ((lo_mask & ~1) != 0)
24278             fprintf (f, ", ");
24279         }
24280     }
24281
24282   if (mask & (1 << PC_REGNUM))
24283     {
24284       /* Catch popping the PC.  */
24285       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24286           || IS_CMSE_ENTRY (arm_current_func_type ()))
24287         {
24288           /* The PC is never poped directly, instead
24289              it is popped into r3 and then BX is used.  */
24290           fprintf (f, "}\n");
24291
24292           thumb_exit (f, -1);
24293
24294           return;
24295         }
24296       else
24297         {
24298           if (mask & 0xFF)
24299             fprintf (f, ", ");
24300
24301           asm_fprintf (f, "%r", PC_REGNUM);
24302         }
24303     }
24304
24305   fprintf (f, "}\n");
24306 }
24307
24308 /* Generate code to return from a thumb function.
24309    If 'reg_containing_return_addr' is -1, then the return address is
24310    actually on the stack, at the stack pointer.
24311
24312    Note: do not forget to update length attribute of corresponding insn pattern
24313    when changing assembly output (eg. length attribute of epilogue_insns when
24314    updating Armv8-M Baseline Security Extensions register clearing
24315    sequences).  */
24316 static void
24317 thumb_exit (FILE *f, int reg_containing_return_addr)
24318 {
24319   unsigned regs_available_for_popping;
24320   unsigned regs_to_pop;
24321   int pops_needed;
24322   unsigned available;
24323   unsigned required;
24324   machine_mode mode;
24325   int size;
24326   int restore_a4 = FALSE;
24327
24328   /* Compute the registers we need to pop.  */
24329   regs_to_pop = 0;
24330   pops_needed = 0;
24331
24332   if (reg_containing_return_addr == -1)
24333     {
24334       regs_to_pop |= 1 << LR_REGNUM;
24335       ++pops_needed;
24336     }
24337
24338   if (TARGET_BACKTRACE)
24339     {
24340       /* Restore the (ARM) frame pointer and stack pointer.  */
24341       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24342       pops_needed += 2;
24343     }
24344
24345   /* If there is nothing to pop then just emit the BX instruction and
24346      return.  */
24347   if (pops_needed == 0)
24348     {
24349       if (crtl->calls_eh_return)
24350         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24351
24352       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24353         {
24354           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24355                        reg_containing_return_addr);
24356           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24357         }
24358       else
24359         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24360       return;
24361     }
24362   /* Otherwise if we are not supporting interworking and we have not created
24363      a backtrace structure and the function was not entered in ARM mode then
24364      just pop the return address straight into the PC.  */
24365   else if (!TARGET_INTERWORK
24366            && !TARGET_BACKTRACE
24367            && !is_called_in_ARM_mode (current_function_decl)
24368            && !crtl->calls_eh_return
24369            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24370     {
24371       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24372       return;
24373     }
24374
24375   /* Find out how many of the (return) argument registers we can corrupt.  */
24376   regs_available_for_popping = 0;
24377
24378   /* If returning via __builtin_eh_return, the bottom three registers
24379      all contain information needed for the return.  */
24380   if (crtl->calls_eh_return)
24381     size = 12;
24382   else
24383     {
24384       /* If we can deduce the registers used from the function's
24385          return value.  This is more reliable that examining
24386          df_regs_ever_live_p () because that will be set if the register is
24387          ever used in the function, not just if the register is used
24388          to hold a return value.  */
24389
24390       if (crtl->return_rtx != 0)
24391         mode = GET_MODE (crtl->return_rtx);
24392       else
24393         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24394
24395       size = GET_MODE_SIZE (mode);
24396
24397       if (size == 0)
24398         {
24399           /* In a void function we can use any argument register.
24400              In a function that returns a structure on the stack
24401              we can use the second and third argument registers.  */
24402           if (mode == VOIDmode)
24403             regs_available_for_popping =
24404               (1 << ARG_REGISTER (1))
24405               | (1 << ARG_REGISTER (2))
24406               | (1 << ARG_REGISTER (3));
24407           else
24408             regs_available_for_popping =
24409               (1 << ARG_REGISTER (2))
24410               | (1 << ARG_REGISTER (3));
24411         }
24412       else if (size <= 4)
24413         regs_available_for_popping =
24414           (1 << ARG_REGISTER (2))
24415           | (1 << ARG_REGISTER (3));
24416       else if (size <= 8)
24417         regs_available_for_popping =
24418           (1 << ARG_REGISTER (3));
24419     }
24420
24421   /* Match registers to be popped with registers into which we pop them.  */
24422   for (available = regs_available_for_popping,
24423        required  = regs_to_pop;
24424        required != 0 && available != 0;
24425        available &= ~(available & - available),
24426        required  &= ~(required  & - required))
24427     -- pops_needed;
24428
24429   /* If we have any popping registers left over, remove them.  */
24430   if (available > 0)
24431     regs_available_for_popping &= ~available;
24432
24433   /* Otherwise if we need another popping register we can use
24434      the fourth argument register.  */
24435   else if (pops_needed)
24436     {
24437       /* If we have not found any free argument registers and
24438          reg a4 contains the return address, we must move it.  */
24439       if (regs_available_for_popping == 0
24440           && reg_containing_return_addr == LAST_ARG_REGNUM)
24441         {
24442           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24443           reg_containing_return_addr = LR_REGNUM;
24444         }
24445       else if (size > 12)
24446         {
24447           /* Register a4 is being used to hold part of the return value,
24448              but we have dire need of a free, low register.  */
24449           restore_a4 = TRUE;
24450
24451           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24452         }
24453
24454       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24455         {
24456           /* The fourth argument register is available.  */
24457           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24458
24459           --pops_needed;
24460         }
24461     }
24462
24463   /* Pop as many registers as we can.  */
24464   thumb_pop (f, regs_available_for_popping);
24465
24466   /* Process the registers we popped.  */
24467   if (reg_containing_return_addr == -1)
24468     {
24469       /* The return address was popped into the lowest numbered register.  */
24470       regs_to_pop &= ~(1 << LR_REGNUM);
24471
24472       reg_containing_return_addr =
24473         number_of_first_bit_set (regs_available_for_popping);
24474
24475       /* Remove this register for the mask of available registers, so that
24476          the return address will not be corrupted by further pops.  */
24477       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24478     }
24479
24480   /* If we popped other registers then handle them here.  */
24481   if (regs_available_for_popping)
24482     {
24483       int frame_pointer;
24484
24485       /* Work out which register currently contains the frame pointer.  */
24486       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24487
24488       /* Move it into the correct place.  */
24489       asm_fprintf (f, "\tmov\t%r, %r\n",
24490                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24491
24492       /* (Temporarily) remove it from the mask of popped registers.  */
24493       regs_available_for_popping &= ~(1 << frame_pointer);
24494       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24495
24496       if (regs_available_for_popping)
24497         {
24498           int stack_pointer;
24499
24500           /* We popped the stack pointer as well,
24501              find the register that contains it.  */
24502           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24503
24504           /* Move it into the stack register.  */
24505           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24506
24507           /* At this point we have popped all necessary registers, so
24508              do not worry about restoring regs_available_for_popping
24509              to its correct value:
24510
24511              assert (pops_needed == 0)
24512              assert (regs_available_for_popping == (1 << frame_pointer))
24513              assert (regs_to_pop == (1 << STACK_POINTER))  */
24514         }
24515       else
24516         {
24517           /* Since we have just move the popped value into the frame
24518              pointer, the popping register is available for reuse, and
24519              we know that we still have the stack pointer left to pop.  */
24520           regs_available_for_popping |= (1 << frame_pointer);
24521         }
24522     }
24523
24524   /* If we still have registers left on the stack, but we no longer have
24525      any registers into which we can pop them, then we must move the return
24526      address into the link register and make available the register that
24527      contained it.  */
24528   if (regs_available_for_popping == 0 && pops_needed > 0)
24529     {
24530       regs_available_for_popping |= 1 << reg_containing_return_addr;
24531
24532       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24533                    reg_containing_return_addr);
24534
24535       reg_containing_return_addr = LR_REGNUM;
24536     }
24537
24538   /* If we have registers left on the stack then pop some more.
24539      We know that at most we will want to pop FP and SP.  */
24540   if (pops_needed > 0)
24541     {
24542       int  popped_into;
24543       int  move_to;
24544
24545       thumb_pop (f, regs_available_for_popping);
24546
24547       /* We have popped either FP or SP.
24548          Move whichever one it is into the correct register.  */
24549       popped_into = number_of_first_bit_set (regs_available_for_popping);
24550       move_to     = number_of_first_bit_set (regs_to_pop);
24551
24552       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24553       --pops_needed;
24554     }
24555
24556   /* If we still have not popped everything then we must have only
24557      had one register available to us and we are now popping the SP.  */
24558   if (pops_needed > 0)
24559     {
24560       int  popped_into;
24561
24562       thumb_pop (f, regs_available_for_popping);
24563
24564       popped_into = number_of_first_bit_set (regs_available_for_popping);
24565
24566       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24567       /*
24568         assert (regs_to_pop == (1 << STACK_POINTER))
24569         assert (pops_needed == 1)
24570       */
24571     }
24572
24573   /* If necessary restore the a4 register.  */
24574   if (restore_a4)
24575     {
24576       if (reg_containing_return_addr != LR_REGNUM)
24577         {
24578           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24579           reg_containing_return_addr = LR_REGNUM;
24580         }
24581
24582       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24583     }
24584
24585   if (crtl->calls_eh_return)
24586     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24587
24588   /* Return to caller.  */
24589   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24590     {
24591       /* This is for the cases where LR is not being used to contain the return
24592          address.  It may therefore contain information that we might not want
24593          to leak, hence it must be cleared.  The value in R0 will never be a
24594          secret at this point, so it is safe to use it, see the clearing code
24595          in 'cmse_nonsecure_entry_clear_before_return'.  */
24596       if (reg_containing_return_addr != LR_REGNUM)
24597         asm_fprintf (f, "\tmov\tlr, r0\n");
24598
24599       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24600       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24601     }
24602   else
24603     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24604 }
24605 \f
24606 /* Scan INSN just before assembler is output for it.
24607    For Thumb-1, we track the status of the condition codes; this
24608    information is used in the cbranchsi4_insn pattern.  */
24609 void
24610 thumb1_final_prescan_insn (rtx_insn *insn)
24611 {
24612   if (flag_print_asm_name)
24613     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24614                  INSN_ADDRESSES (INSN_UID (insn)));
24615   /* Don't overwrite the previous setter when we get to a cbranch.  */
24616   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24617     {
24618       enum attr_conds conds;
24619
24620       if (cfun->machine->thumb1_cc_insn)
24621         {
24622           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24623               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24624             CC_STATUS_INIT;
24625         }
24626       conds = get_attr_conds (insn);
24627       if (conds == CONDS_SET)
24628         {
24629           rtx set = single_set (insn);
24630           cfun->machine->thumb1_cc_insn = insn;
24631           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24632           cfun->machine->thumb1_cc_op1 = const0_rtx;
24633           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24634           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24635             {
24636               rtx src1 = XEXP (SET_SRC (set), 1);
24637               if (src1 == const0_rtx)
24638                 cfun->machine->thumb1_cc_mode = CCmode;
24639             }
24640           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24641             {
24642               /* Record the src register operand instead of dest because
24643                  cprop_hardreg pass propagates src.  */
24644               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24645             }
24646         }
24647       else if (conds != CONDS_NOCOND)
24648         cfun->machine->thumb1_cc_insn = NULL_RTX;
24649     }
24650
24651     /* Check if unexpected far jump is used.  */
24652     if (cfun->machine->lr_save_eliminated
24653         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24654       internal_error("Unexpected thumb1 far jump");
24655 }
24656
24657 int
24658 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24659 {
24660   unsigned HOST_WIDE_INT mask = 0xff;
24661   int i;
24662
24663   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24664   if (val == 0) /* XXX */
24665     return 0;
24666
24667   for (i = 0; i < 25; i++)
24668     if ((val & (mask << i)) == val)
24669       return 1;
24670
24671   return 0;
24672 }
24673
24674 /* Returns nonzero if the current function contains,
24675    or might contain a far jump.  */
24676 static int
24677 thumb_far_jump_used_p (void)
24678 {
24679   rtx_insn *insn;
24680   bool far_jump = false;
24681   unsigned int func_size = 0;
24682
24683   /* If we have already decided that far jumps may be used,
24684      do not bother checking again, and always return true even if
24685      it turns out that they are not being used.  Once we have made
24686      the decision that far jumps are present (and that hence the link
24687      register will be pushed onto the stack) we cannot go back on it.  */
24688   if (cfun->machine->far_jump_used)
24689     return 1;
24690
24691   /* If this function is not being called from the prologue/epilogue
24692      generation code then it must be being called from the
24693      INITIAL_ELIMINATION_OFFSET macro.  */
24694   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24695     {
24696       /* In this case we know that we are being asked about the elimination
24697          of the arg pointer register.  If that register is not being used,
24698          then there are no arguments on the stack, and we do not have to
24699          worry that a far jump might force the prologue to push the link
24700          register, changing the stack offsets.  In this case we can just
24701          return false, since the presence of far jumps in the function will
24702          not affect stack offsets.
24703
24704          If the arg pointer is live (or if it was live, but has now been
24705          eliminated and so set to dead) then we do have to test to see if
24706          the function might contain a far jump.  This test can lead to some
24707          false negatives, since before reload is completed, then length of
24708          branch instructions is not known, so gcc defaults to returning their
24709          longest length, which in turn sets the far jump attribute to true.
24710
24711          A false negative will not result in bad code being generated, but it
24712          will result in a needless push and pop of the link register.  We
24713          hope that this does not occur too often.
24714
24715          If we need doubleword stack alignment this could affect the other
24716          elimination offsets so we can't risk getting it wrong.  */
24717       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24718         cfun->machine->arg_pointer_live = 1;
24719       else if (!cfun->machine->arg_pointer_live)
24720         return 0;
24721     }
24722
24723   /* We should not change far_jump_used during or after reload, as there is
24724      no chance to change stack frame layout.  */
24725   if (reload_in_progress || reload_completed)
24726     return 0;
24727
24728   /* Check to see if the function contains a branch
24729      insn with the far jump attribute set.  */
24730   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24731     {
24732       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24733         {
24734           far_jump = true;
24735         }
24736       func_size += get_attr_length (insn);
24737     }
24738
24739   /* Attribute far_jump will always be true for thumb1 before
24740      shorten_branch pass.  So checking far_jump attribute before
24741      shorten_branch isn't much useful.
24742
24743      Following heuristic tries to estimate more accurately if a far jump
24744      may finally be used.  The heuristic is very conservative as there is
24745      no chance to roll-back the decision of not to use far jump.
24746
24747      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24748      2-byte insn is associated with a 4 byte constant pool.  Using
24749      function size 2048/3 as the threshold is conservative enough.  */
24750   if (far_jump)
24751     {
24752       if ((func_size * 3) >= 2048)
24753         {
24754           /* Record the fact that we have decided that
24755              the function does use far jumps.  */
24756           cfun->machine->far_jump_used = 1;
24757           return 1;
24758         }
24759     }
24760
24761   return 0;
24762 }
24763
24764 /* Return nonzero if FUNC must be entered in ARM mode.  */
24765 static bool
24766 is_called_in_ARM_mode (tree func)
24767 {
24768   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24769
24770   /* Ignore the problem about functions whose address is taken.  */
24771   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24772     return true;
24773
24774 #ifdef ARM_PE
24775   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24776 #else
24777   return false;
24778 #endif
24779 }
24780
24781 /* Given the stack offsets and register mask in OFFSETS, decide how
24782    many additional registers to push instead of subtracting a constant
24783    from SP.  For epilogues the principle is the same except we use pop.
24784    FOR_PROLOGUE indicates which we're generating.  */
24785 static int
24786 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24787 {
24788   HOST_WIDE_INT amount;
24789   unsigned long live_regs_mask = offsets->saved_regs_mask;
24790   /* Extract a mask of the ones we can give to the Thumb's push/pop
24791      instruction.  */
24792   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24793   /* Then count how many other high registers will need to be pushed.  */
24794   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24795   int n_free, reg_base, size;
24796
24797   if (!for_prologue && frame_pointer_needed)
24798     amount = offsets->locals_base - offsets->saved_regs;
24799   else
24800     amount = offsets->outgoing_args - offsets->saved_regs;
24801
24802   /* If the stack frame size is 512 exactly, we can save one load
24803      instruction, which should make this a win even when optimizing
24804      for speed.  */
24805   if (!optimize_size && amount != 512)
24806     return 0;
24807
24808   /* Can't do this if there are high registers to push.  */
24809   if (high_regs_pushed != 0)
24810     return 0;
24811
24812   /* Shouldn't do it in the prologue if no registers would normally
24813      be pushed at all.  In the epilogue, also allow it if we'll have
24814      a pop insn for the PC.  */
24815   if  (l_mask == 0
24816        && (for_prologue
24817            || TARGET_BACKTRACE
24818            || (live_regs_mask & 1 << LR_REGNUM) == 0
24819            || TARGET_INTERWORK
24820            || crtl->args.pretend_args_size != 0))
24821     return 0;
24822
24823   /* Don't do this if thumb_expand_prologue wants to emit instructions
24824      between the push and the stack frame allocation.  */
24825   if (for_prologue
24826       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24827           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24828     return 0;
24829
24830   reg_base = 0;
24831   n_free = 0;
24832   if (!for_prologue)
24833     {
24834       size = arm_size_return_regs ();
24835       reg_base = ARM_NUM_INTS (size);
24836       live_regs_mask >>= reg_base;
24837     }
24838
24839   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24840          && (for_prologue || call_used_regs[reg_base + n_free]))
24841     {
24842       live_regs_mask >>= 1;
24843       n_free++;
24844     }
24845
24846   if (n_free == 0)
24847     return 0;
24848   gcc_assert (amount / 4 * 4 == amount);
24849
24850   if (amount >= 512 && (amount - n_free * 4) < 512)
24851     return (amount - 508) / 4;
24852   if (amount <= n_free * 4)
24853     return amount / 4;
24854   return 0;
24855 }
24856
24857 /* The bits which aren't usefully expanded as rtl.  */
24858 const char *
24859 thumb1_unexpanded_epilogue (void)
24860 {
24861   arm_stack_offsets *offsets;
24862   int regno;
24863   unsigned long live_regs_mask = 0;
24864   int high_regs_pushed = 0;
24865   int extra_pop;
24866   int had_to_push_lr;
24867   int size;
24868
24869   if (cfun->machine->return_used_this_function != 0)
24870     return "";
24871
24872   if (IS_NAKED (arm_current_func_type ()))
24873     return "";
24874
24875   offsets = arm_get_frame_offsets ();
24876   live_regs_mask = offsets->saved_regs_mask;
24877   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24878
24879   /* If we can deduce the registers used from the function's return value.
24880      This is more reliable that examining df_regs_ever_live_p () because that
24881      will be set if the register is ever used in the function, not just if
24882      the register is used to hold a return value.  */
24883   size = arm_size_return_regs ();
24884
24885   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24886   if (extra_pop > 0)
24887     {
24888       unsigned long extra_mask = (1 << extra_pop) - 1;
24889       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24890     }
24891
24892   /* The prolog may have pushed some high registers to use as
24893      work registers.  e.g. the testsuite file:
24894      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24895      compiles to produce:
24896         push    {r4, r5, r6, r7, lr}
24897         mov     r7, r9
24898         mov     r6, r8
24899         push    {r6, r7}
24900      as part of the prolog.  We have to undo that pushing here.  */
24901
24902   if (high_regs_pushed)
24903     {
24904       unsigned long mask = live_regs_mask & 0xff;
24905       int next_hi_reg;
24906
24907       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
24908
24909       if (mask == 0)
24910         /* Oh dear!  We have no low registers into which we can pop
24911            high registers!  */
24912         internal_error
24913           ("no low registers available for popping high registers");
24914
24915       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24916         if (live_regs_mask & (1 << next_hi_reg))
24917           break;
24918
24919       while (high_regs_pushed)
24920         {
24921           /* Find lo register(s) into which the high register(s) can
24922              be popped.  */
24923           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
24924             {
24925               if (mask & (1 << regno))
24926                 high_regs_pushed--;
24927               if (high_regs_pushed == 0)
24928                 break;
24929             }
24930
24931           if (high_regs_pushed == 0 && regno >= 0)
24932             mask &= ~((1 << regno) - 1);
24933
24934           /* Pop the values into the low register(s).  */
24935           thumb_pop (asm_out_file, mask);
24936
24937           /* Move the value(s) into the high registers.  */
24938           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
24939             {
24940               if (mask & (1 << regno))
24941                 {
24942                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24943                                regno);
24944
24945                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
24946                        next_hi_reg--)
24947                     if (live_regs_mask & (1 << next_hi_reg))
24948                       break;
24949                 }
24950             }
24951         }
24952       live_regs_mask &= ~0x0f00;
24953     }
24954
24955   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24956   live_regs_mask &= 0xff;
24957
24958   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24959     {
24960       /* Pop the return address into the PC.  */
24961       if (had_to_push_lr)
24962         live_regs_mask |= 1 << PC_REGNUM;
24963
24964       /* Either no argument registers were pushed or a backtrace
24965          structure was created which includes an adjusted stack
24966          pointer, so just pop everything.  */
24967       if (live_regs_mask)
24968         thumb_pop (asm_out_file, live_regs_mask);
24969
24970       /* We have either just popped the return address into the
24971          PC or it is was kept in LR for the entire function.
24972          Note that thumb_pop has already called thumb_exit if the
24973          PC was in the list.  */
24974       if (!had_to_push_lr)
24975         thumb_exit (asm_out_file, LR_REGNUM);
24976     }
24977   else
24978     {
24979       /* Pop everything but the return address.  */
24980       if (live_regs_mask)
24981         thumb_pop (asm_out_file, live_regs_mask);
24982
24983       if (had_to_push_lr)
24984         {
24985           if (size > 12)
24986             {
24987               /* We have no free low regs, so save one.  */
24988               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24989                            LAST_ARG_REGNUM);
24990             }
24991
24992           /* Get the return address into a temporary register.  */
24993           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24994
24995           if (size > 12)
24996             {
24997               /* Move the return address to lr.  */
24998               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24999                            LAST_ARG_REGNUM);
25000               /* Restore the low register.  */
25001               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
25002                            IP_REGNUM);
25003               regno = LR_REGNUM;
25004             }
25005           else
25006             regno = LAST_ARG_REGNUM;
25007         }
25008       else
25009         regno = LR_REGNUM;
25010
25011       /* Remove the argument registers that were pushed onto the stack.  */
25012       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
25013                    SP_REGNUM, SP_REGNUM,
25014                    crtl->args.pretend_args_size);
25015
25016       thumb_exit (asm_out_file, regno);
25017     }
25018
25019   return "";
25020 }
25021
25022 /* Functions to save and restore machine-specific function data.  */
25023 static struct machine_function *
25024 arm_init_machine_status (void)
25025 {
25026   struct machine_function *machine;
25027   machine = ggc_cleared_alloc<machine_function> ();
25028
25029 #if ARM_FT_UNKNOWN != 0
25030   machine->func_type = ARM_FT_UNKNOWN;
25031 #endif
25032   machine->static_chain_stack_bytes = -1;
25033   return machine;
25034 }
25035
25036 /* Return an RTX indicating where the return address to the
25037    calling function can be found.  */
25038 rtx
25039 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25040 {
25041   if (count != 0)
25042     return NULL_RTX;
25043
25044   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25045 }
25046
25047 /* Do anything needed before RTL is emitted for each function.  */
25048 void
25049 arm_init_expanders (void)
25050 {
25051   /* Arrange to initialize and mark the machine per-function status.  */
25052   init_machine_status = arm_init_machine_status;
25053
25054   /* This is to stop the combine pass optimizing away the alignment
25055      adjustment of va_arg.  */
25056   /* ??? It is claimed that this should not be necessary.  */
25057   if (cfun)
25058     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25059 }
25060
25061 /* Check that FUNC is called with a different mode.  */
25062
25063 bool
25064 arm_change_mode_p (tree func)
25065 {
25066   if (TREE_CODE (func) != FUNCTION_DECL)
25067     return false;
25068
25069   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
25070
25071   if (!callee_tree)
25072     callee_tree = target_option_default_node;
25073
25074   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25075   int flags = callee_opts->x_target_flags;
25076
25077   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
25078 }
25079
25080 /* Like arm_compute_initial_elimination offset.  Simpler because there
25081    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
25082    to point at the base of the local variables after static stack
25083    space for a function has been allocated.  */
25084
25085 HOST_WIDE_INT
25086 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25087 {
25088   arm_stack_offsets *offsets;
25089
25090   offsets = arm_get_frame_offsets ();
25091
25092   switch (from)
25093     {
25094     case ARG_POINTER_REGNUM:
25095       switch (to)
25096         {
25097         case STACK_POINTER_REGNUM:
25098           return offsets->outgoing_args - offsets->saved_args;
25099
25100         case FRAME_POINTER_REGNUM:
25101           return offsets->soft_frame - offsets->saved_args;
25102
25103         case ARM_HARD_FRAME_POINTER_REGNUM:
25104           return offsets->saved_regs - offsets->saved_args;
25105
25106         case THUMB_HARD_FRAME_POINTER_REGNUM:
25107           return offsets->locals_base - offsets->saved_args;
25108
25109         default:
25110           gcc_unreachable ();
25111         }
25112       break;
25113
25114     case FRAME_POINTER_REGNUM:
25115       switch (to)
25116         {
25117         case STACK_POINTER_REGNUM:
25118           return offsets->outgoing_args - offsets->soft_frame;
25119
25120         case ARM_HARD_FRAME_POINTER_REGNUM:
25121           return offsets->saved_regs - offsets->soft_frame;
25122
25123         case THUMB_HARD_FRAME_POINTER_REGNUM:
25124           return offsets->locals_base - offsets->soft_frame;
25125
25126         default:
25127           gcc_unreachable ();
25128         }
25129       break;
25130
25131     default:
25132       gcc_unreachable ();
25133     }
25134 }
25135
25136 /* Generate the function's prologue.  */
25137
25138 void
25139 thumb1_expand_prologue (void)
25140 {
25141   rtx_insn *insn;
25142
25143   HOST_WIDE_INT amount;
25144   HOST_WIDE_INT size;
25145   arm_stack_offsets *offsets;
25146   unsigned long func_type;
25147   int regno;
25148   unsigned long live_regs_mask;
25149   unsigned long l_mask;
25150   unsigned high_regs_pushed = 0;
25151   bool lr_needs_saving;
25152
25153   func_type = arm_current_func_type ();
25154
25155   /* Naked functions don't have prologues.  */
25156   if (IS_NAKED (func_type))
25157     {
25158       if (flag_stack_usage_info)
25159         current_function_static_stack_size = 0;
25160       return;
25161     }
25162
25163   if (IS_INTERRUPT (func_type))
25164     {
25165       error ("interrupt Service Routines cannot be coded in Thumb mode");
25166       return;
25167     }
25168
25169   if (is_called_in_ARM_mode (current_function_decl))
25170     emit_insn (gen_prologue_thumb1_interwork ());
25171
25172   offsets = arm_get_frame_offsets ();
25173   live_regs_mask = offsets->saved_regs_mask;
25174   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25175
25176   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
25177   l_mask = live_regs_mask & 0x40ff;
25178   /* Then count how many other high registers will need to be pushed.  */
25179   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25180
25181   if (crtl->args.pretend_args_size)
25182     {
25183       rtx x = GEN_INT (-crtl->args.pretend_args_size);
25184
25185       if (cfun->machine->uses_anonymous_args)
25186         {
25187           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25188           unsigned long mask;
25189
25190           mask = 1ul << (LAST_ARG_REGNUM + 1);
25191           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25192
25193           insn = thumb1_emit_multi_reg_push (mask, 0);
25194         }
25195       else
25196         {
25197           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25198                                         stack_pointer_rtx, x));
25199         }
25200       RTX_FRAME_RELATED_P (insn) = 1;
25201     }
25202
25203   if (TARGET_BACKTRACE)
25204     {
25205       HOST_WIDE_INT offset = 0;
25206       unsigned work_register;
25207       rtx work_reg, x, arm_hfp_rtx;
25208
25209       /* We have been asked to create a stack backtrace structure.
25210          The code looks like this:
25211
25212          0   .align 2
25213          0   func:
25214          0     sub   SP, #16         Reserve space for 4 registers.
25215          2     push  {R7}            Push low registers.
25216          4     add   R7, SP, #20     Get the stack pointer before the push.
25217          6     str   R7, [SP, #8]    Store the stack pointer
25218                                         (before reserving the space).
25219          8     mov   R7, PC          Get hold of the start of this code + 12.
25220         10     str   R7, [SP, #16]   Store it.
25221         12     mov   R7, FP          Get hold of the current frame pointer.
25222         14     str   R7, [SP, #4]    Store it.
25223         16     mov   R7, LR          Get hold of the current return address.
25224         18     str   R7, [SP, #12]   Store it.
25225         20     add   R7, SP, #16     Point at the start of the
25226                                         backtrace structure.
25227         22     mov   FP, R7          Put this value into the frame pointer.  */
25228
25229       work_register = thumb_find_work_register (live_regs_mask);
25230       work_reg = gen_rtx_REG (SImode, work_register);
25231       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25232
25233       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25234                                     stack_pointer_rtx, GEN_INT (-16)));
25235       RTX_FRAME_RELATED_P (insn) = 1;
25236
25237       if (l_mask)
25238         {
25239           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25240           RTX_FRAME_RELATED_P (insn) = 1;
25241           lr_needs_saving = false;
25242
25243           offset = bit_count (l_mask) * UNITS_PER_WORD;
25244         }
25245
25246       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25247       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25248
25249       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25250       x = gen_frame_mem (SImode, x);
25251       emit_move_insn (x, work_reg);
25252
25253       /* Make sure that the instruction fetching the PC is in the right place
25254          to calculate "start of backtrace creation code + 12".  */
25255       /* ??? The stores using the common WORK_REG ought to be enough to
25256          prevent the scheduler from doing anything weird.  Failing that
25257          we could always move all of the following into an UNSPEC_VOLATILE.  */
25258       if (l_mask)
25259         {
25260           x = gen_rtx_REG (SImode, PC_REGNUM);
25261           emit_move_insn (work_reg, x);
25262
25263           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25264           x = gen_frame_mem (SImode, x);
25265           emit_move_insn (x, work_reg);
25266
25267           emit_move_insn (work_reg, arm_hfp_rtx);
25268
25269           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25270           x = gen_frame_mem (SImode, x);
25271           emit_move_insn (x, work_reg);
25272         }
25273       else
25274         {
25275           emit_move_insn (work_reg, arm_hfp_rtx);
25276
25277           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25278           x = gen_frame_mem (SImode, x);
25279           emit_move_insn (x, work_reg);
25280
25281           x = gen_rtx_REG (SImode, PC_REGNUM);
25282           emit_move_insn (work_reg, x);
25283
25284           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25285           x = gen_frame_mem (SImode, x);
25286           emit_move_insn (x, work_reg);
25287         }
25288
25289       x = gen_rtx_REG (SImode, LR_REGNUM);
25290       emit_move_insn (work_reg, x);
25291
25292       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25293       x = gen_frame_mem (SImode, x);
25294       emit_move_insn (x, work_reg);
25295
25296       x = GEN_INT (offset + 12);
25297       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25298
25299       emit_move_insn (arm_hfp_rtx, work_reg);
25300     }
25301   /* Optimization:  If we are not pushing any low registers but we are going
25302      to push some high registers then delay our first push.  This will just
25303      be a push of LR and we can combine it with the push of the first high
25304      register.  */
25305   else if ((l_mask & 0xff) != 0
25306            || (high_regs_pushed == 0 && lr_needs_saving))
25307     {
25308       unsigned long mask = l_mask;
25309       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25310       insn = thumb1_emit_multi_reg_push (mask, mask);
25311       RTX_FRAME_RELATED_P (insn) = 1;
25312       lr_needs_saving = false;
25313     }
25314
25315   if (high_regs_pushed)
25316     {
25317       unsigned pushable_regs;
25318       unsigned next_hi_reg;
25319       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25320                                                  : crtl->args.info.nregs;
25321       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25322
25323       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25324         if (live_regs_mask & (1 << next_hi_reg))
25325           break;
25326
25327       /* Here we need to mask out registers used for passing arguments
25328          even if they can be pushed.  This is to avoid using them to
25329          stash the high registers.  Such kind of stash may clobber the
25330          use of arguments.  */
25331       pushable_regs = l_mask & (~arg_regs_mask);
25332       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
25333
25334       /* Normally, LR can be used as a scratch register once it has been
25335          saved; but if the function examines its own return address then
25336          the value is still live and we need to avoid using it.  */
25337       bool return_addr_live
25338         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
25339                            LR_REGNUM);
25340
25341       if (lr_needs_saving || return_addr_live)
25342         pushable_regs &= ~(1 << LR_REGNUM);
25343
25344       if (pushable_regs == 0)
25345         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25346
25347       while (high_regs_pushed > 0)
25348         {
25349           unsigned long real_regs_mask = 0;
25350           unsigned long push_mask = 0;
25351
25352           for (regno = LR_REGNUM; regno >= 0; regno --)
25353             {
25354               if (pushable_regs & (1 << regno))
25355                 {
25356                   emit_move_insn (gen_rtx_REG (SImode, regno),
25357                                   gen_rtx_REG (SImode, next_hi_reg));
25358
25359                   high_regs_pushed --;
25360                   real_regs_mask |= (1 << next_hi_reg);
25361                   push_mask |= (1 << regno);
25362
25363                   if (high_regs_pushed)
25364                     {
25365                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25366                            next_hi_reg --)
25367                         if (live_regs_mask & (1 << next_hi_reg))
25368                           break;
25369                     }
25370                   else
25371                     break;
25372                 }
25373             }
25374
25375           /* If we had to find a work register and we have not yet
25376              saved the LR then add it to the list of regs to push.  */
25377           if (lr_needs_saving)
25378             {
25379               push_mask |= 1 << LR_REGNUM;
25380               real_regs_mask |= 1 << LR_REGNUM;
25381               lr_needs_saving = false;
25382               /* If the return address is not live at this point, we
25383                  can add LR to the list of registers that we can use
25384                  for pushes.  */
25385               if (!return_addr_live)
25386                 pushable_regs |= 1 << LR_REGNUM;
25387             }
25388
25389           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25390           RTX_FRAME_RELATED_P (insn) = 1;
25391         }
25392     }
25393
25394   /* Load the pic register before setting the frame pointer,
25395      so we can use r7 as a temporary work register.  */
25396   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25397     arm_load_pic_register (live_regs_mask, NULL_RTX);
25398
25399   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25400     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25401                     stack_pointer_rtx);
25402
25403   size = offsets->outgoing_args - offsets->saved_args;
25404   if (flag_stack_usage_info)
25405     current_function_static_stack_size = size;
25406
25407   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25408   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25409        || flag_stack_clash_protection)
25410       && size)
25411     sorry ("%<-fstack-check=specific%> for Thumb-1");
25412
25413   amount = offsets->outgoing_args - offsets->saved_regs;
25414   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25415   if (amount)
25416     {
25417       if (amount < 512)
25418         {
25419           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25420                                         GEN_INT (- amount)));
25421           RTX_FRAME_RELATED_P (insn) = 1;
25422         }
25423       else
25424         {
25425           rtx reg, dwarf;
25426
25427           /* The stack decrement is too big for an immediate value in a single
25428              insn.  In theory we could issue multiple subtracts, but after
25429              three of them it becomes more space efficient to place the full
25430              value in the constant pool and load into a register.  (Also the
25431              ARM debugger really likes to see only one stack decrement per
25432              function).  So instead we look for a scratch register into which
25433              we can load the decrement, and then we subtract this from the
25434              stack pointer.  Unfortunately on the thumb the only available
25435              scratch registers are the argument registers, and we cannot use
25436              these as they may hold arguments to the function.  Instead we
25437              attempt to locate a call preserved register which is used by this
25438              function.  If we can find one, then we know that it will have
25439              been pushed at the start of the prologue and so we can corrupt
25440              it now.  */
25441           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25442             if (live_regs_mask & (1 << regno))
25443               break;
25444
25445           gcc_assert(regno <= LAST_LO_REGNUM);
25446
25447           reg = gen_rtx_REG (SImode, regno);
25448
25449           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25450
25451           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25452                                         stack_pointer_rtx, reg));
25453
25454           dwarf = gen_rtx_SET (stack_pointer_rtx,
25455                                plus_constant (Pmode, stack_pointer_rtx,
25456                                               -amount));
25457           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25458           RTX_FRAME_RELATED_P (insn) = 1;
25459         }
25460     }
25461
25462   if (frame_pointer_needed)
25463     thumb_set_frame_pointer (offsets);
25464
25465   /* If we are profiling, make sure no instructions are scheduled before
25466      the call to mcount.  Similarly if the user has requested no
25467      scheduling in the prolog.  Similarly if we want non-call exceptions
25468      using the EABI unwinder, to prevent faulting instructions from being
25469      swapped with a stack adjustment.  */
25470   if (crtl->profile || !TARGET_SCHED_PROLOG
25471       || (arm_except_unwind_info (&global_options) == UI_TARGET
25472           && cfun->can_throw_non_call_exceptions))
25473     emit_insn (gen_blockage ());
25474
25475   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25476   if (live_regs_mask & 0xff)
25477     cfun->machine->lr_save_eliminated = 0;
25478 }
25479
25480 /* Clear caller saved registers not used to pass return values and leaked
25481    condition flags before exiting a cmse_nonsecure_entry function.  */
25482
25483 void
25484 cmse_nonsecure_entry_clear_before_return (void)
25485 {
25486   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25487   uint32_t padding_bits_to_clear = 0;
25488   auto_sbitmap to_clear_bitmap (maxregno + 1);
25489   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25490   tree result_type;
25491
25492   bitmap_clear (to_clear_bitmap);
25493   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25494   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25495
25496   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25497      registers.  */
25498   if (TARGET_HARD_FLOAT)
25499     {
25500       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25501
25502       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25503
25504       /* Make sure we don't clear the two scratch registers used to clear the
25505          relevant FPSCR bits in output_return_instruction.  */
25506       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25507       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25508       emit_use (gen_rtx_REG (SImode, 4));
25509       bitmap_clear_bit (to_clear_bitmap, 4);
25510     }
25511
25512   /* If the user has defined registers to be caller saved, these are no longer
25513      restored by the function before returning and must thus be cleared for
25514      security purposes.  */
25515   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25516     {
25517       /* We do not touch registers that can be used to pass arguments as per
25518          the AAPCS, since these should never be made callee-saved by user
25519          options.  */
25520       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25521         continue;
25522       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25523         continue;
25524       if (call_used_regs[regno])
25525         bitmap_set_bit (to_clear_bitmap, regno);
25526     }
25527
25528   /* Make sure we do not clear the registers used to return the result in.  */
25529   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25530   if (!VOID_TYPE_P (result_type))
25531     {
25532       uint64_t to_clear_return_mask;
25533       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25534
25535       /* No need to check that we return in registers, because we don't
25536          support returning on stack yet.  */
25537       gcc_assert (REG_P (result_rtl));
25538       to_clear_return_mask
25539         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25540                                      &padding_bits_to_clear);
25541       if (to_clear_return_mask)
25542         {
25543           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25544           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25545             {
25546               if (to_clear_return_mask & (1ULL << regno))
25547                 bitmap_clear_bit (to_clear_bitmap, regno);
25548             }
25549         }
25550     }
25551
25552   if (padding_bits_to_clear != 0)
25553     {
25554       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25555       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25556
25557       /* Padding_bits_to_clear is not 0 so we know we are dealing with
25558          returning a composite type, which only uses r0.  Let's make sure that
25559          r1-r3 is cleared too.  */
25560       bitmap_clear (to_clear_arg_regs_bitmap);
25561       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25562       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25563     }
25564
25565   /* Clear full registers that leak before returning.  */
25566   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25567   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25568   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25569                         clearing_reg);
25570 }
25571
25572 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25573    POP instruction can be generated.  LR should be replaced by PC.  All
25574    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25575    all we really need to check here is if single register is to be
25576    returned, or multiple register return.  */
25577 void
25578 thumb2_expand_return (bool simple_return)
25579 {
25580   int i, num_regs;
25581   unsigned long saved_regs_mask;
25582   arm_stack_offsets *offsets;
25583
25584   offsets = arm_get_frame_offsets ();
25585   saved_regs_mask = offsets->saved_regs_mask;
25586
25587   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25588     if (saved_regs_mask & (1 << i))
25589       num_regs++;
25590
25591   if (!simple_return && saved_regs_mask)
25592     {
25593       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25594          functions or adapt code to handle according to ACLE.  This path should
25595          not be reachable for cmse_nonsecure_entry functions though we prefer
25596          to assert it for now to ensure that future code changes do not silently
25597          change this behavior.  */
25598       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25599       if (num_regs == 1)
25600         {
25601           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25602           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25603           rtx addr = gen_rtx_MEM (SImode,
25604                                   gen_rtx_POST_INC (SImode,
25605                                                     stack_pointer_rtx));
25606           set_mem_alias_set (addr, get_frame_alias_set ());
25607           XVECEXP (par, 0, 0) = ret_rtx;
25608           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25609           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25610           emit_jump_insn (par);
25611         }
25612       else
25613         {
25614           saved_regs_mask &= ~ (1 << LR_REGNUM);
25615           saved_regs_mask |=   (1 << PC_REGNUM);
25616           arm_emit_multi_reg_pop (saved_regs_mask);
25617         }
25618     }
25619   else
25620     {
25621       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25622         cmse_nonsecure_entry_clear_before_return ();
25623       emit_jump_insn (simple_return_rtx);
25624     }
25625 }
25626
25627 void
25628 thumb1_expand_epilogue (void)
25629 {
25630   HOST_WIDE_INT amount;
25631   arm_stack_offsets *offsets;
25632   int regno;
25633
25634   /* Naked functions don't have prologues.  */
25635   if (IS_NAKED (arm_current_func_type ()))
25636     return;
25637
25638   offsets = arm_get_frame_offsets ();
25639   amount = offsets->outgoing_args - offsets->saved_regs;
25640
25641   if (frame_pointer_needed)
25642     {
25643       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25644       amount = offsets->locals_base - offsets->saved_regs;
25645     }
25646   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25647
25648   gcc_assert (amount >= 0);
25649   if (amount)
25650     {
25651       emit_insn (gen_blockage ());
25652
25653       if (amount < 512)
25654         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25655                                GEN_INT (amount)));
25656       else
25657         {
25658           /* r3 is always free in the epilogue.  */
25659           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25660
25661           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25662           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25663         }
25664     }
25665
25666   /* Emit a USE (stack_pointer_rtx), so that
25667      the stack adjustment will not be deleted.  */
25668   emit_insn (gen_force_register_use (stack_pointer_rtx));
25669
25670   if (crtl->profile || !TARGET_SCHED_PROLOG)
25671     emit_insn (gen_blockage ());
25672
25673   /* Emit a clobber for each insn that will be restored in the epilogue,
25674      so that flow2 will get register lifetimes correct.  */
25675   for (regno = 0; regno < 13; regno++)
25676     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25677       emit_clobber (gen_rtx_REG (SImode, regno));
25678
25679   if (! df_regs_ever_live_p (LR_REGNUM))
25680     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25681
25682   /* Clear all caller-saved regs that are not used to return.  */
25683   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25684     cmse_nonsecure_entry_clear_before_return ();
25685 }
25686
25687 /* Epilogue code for APCS frame.  */
25688 static void
25689 arm_expand_epilogue_apcs_frame (bool really_return)
25690 {
25691   unsigned long func_type;
25692   unsigned long saved_regs_mask;
25693   int num_regs = 0;
25694   int i;
25695   int floats_from_frame = 0;
25696   arm_stack_offsets *offsets;
25697
25698   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25699   func_type = arm_current_func_type ();
25700
25701   /* Get frame offsets for ARM.  */
25702   offsets = arm_get_frame_offsets ();
25703   saved_regs_mask = offsets->saved_regs_mask;
25704
25705   /* Find the offset of the floating-point save area in the frame.  */
25706   floats_from_frame
25707     = (offsets->saved_args
25708        + arm_compute_static_chain_stack_bytes ()
25709        - offsets->frame);
25710
25711   /* Compute how many core registers saved and how far away the floats are.  */
25712   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25713     if (saved_regs_mask & (1 << i))
25714       {
25715         num_regs++;
25716         floats_from_frame += 4;
25717       }
25718
25719   if (TARGET_HARD_FLOAT)
25720     {
25721       int start_reg;
25722       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25723
25724       /* The offset is from IP_REGNUM.  */
25725       int saved_size = arm_get_vfp_saved_size ();
25726       if (saved_size > 0)
25727         {
25728           rtx_insn *insn;
25729           floats_from_frame += saved_size;
25730           insn = emit_insn (gen_addsi3 (ip_rtx,
25731                                         hard_frame_pointer_rtx,
25732                                         GEN_INT (-floats_from_frame)));
25733           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25734                                        ip_rtx, hard_frame_pointer_rtx);
25735         }
25736
25737       /* Generate VFP register multi-pop.  */
25738       start_reg = FIRST_VFP_REGNUM;
25739
25740       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25741         /* Look for a case where a reg does not need restoring.  */
25742         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25743             && (!df_regs_ever_live_p (i + 1)
25744                 || call_used_regs[i + 1]))
25745           {
25746             if (start_reg != i)
25747               arm_emit_vfp_multi_reg_pop (start_reg,
25748                                           (i - start_reg) / 2,
25749                                           gen_rtx_REG (SImode,
25750                                                        IP_REGNUM));
25751             start_reg = i + 2;
25752           }
25753
25754       /* Restore the remaining regs that we have discovered (or possibly
25755          even all of them, if the conditional in the for loop never
25756          fired).  */
25757       if (start_reg != i)
25758         arm_emit_vfp_multi_reg_pop (start_reg,
25759                                     (i - start_reg) / 2,
25760                                     gen_rtx_REG (SImode, IP_REGNUM));
25761     }
25762
25763   if (TARGET_IWMMXT)
25764     {
25765       /* The frame pointer is guaranteed to be non-double-word aligned, as
25766          it is set to double-word-aligned old_stack_pointer - 4.  */
25767       rtx_insn *insn;
25768       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25769
25770       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25771         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25772           {
25773             rtx addr = gen_frame_mem (V2SImode,
25774                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25775                                                 - lrm_count * 4));
25776             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25777             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25778                                                gen_rtx_REG (V2SImode, i),
25779                                                NULL_RTX);
25780             lrm_count += 2;
25781           }
25782     }
25783
25784   /* saved_regs_mask should contain IP which contains old stack pointer
25785      at the time of activation creation.  Since SP and IP are adjacent registers,
25786      we can restore the value directly into SP.  */
25787   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25788   saved_regs_mask &= ~(1 << IP_REGNUM);
25789   saved_regs_mask |= (1 << SP_REGNUM);
25790
25791   /* There are two registers left in saved_regs_mask - LR and PC.  We
25792      only need to restore LR (the return address), but to
25793      save time we can load it directly into PC, unless we need a
25794      special function exit sequence, or we are not really returning.  */
25795   if (really_return
25796       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25797       && !crtl->calls_eh_return)
25798     /* Delete LR from the register mask, so that LR on
25799        the stack is loaded into the PC in the register mask.  */
25800     saved_regs_mask &= ~(1 << LR_REGNUM);
25801   else
25802     saved_regs_mask &= ~(1 << PC_REGNUM);
25803
25804   num_regs = bit_count (saved_regs_mask);
25805   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25806     {
25807       rtx_insn *insn;
25808       emit_insn (gen_blockage ());
25809       /* Unwind the stack to just below the saved registers.  */
25810       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25811                                     hard_frame_pointer_rtx,
25812                                     GEN_INT (- 4 * num_regs)));
25813
25814       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25815                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25816     }
25817
25818   arm_emit_multi_reg_pop (saved_regs_mask);
25819
25820   if (IS_INTERRUPT (func_type))
25821     {
25822       /* Interrupt handlers will have pushed the
25823          IP onto the stack, so restore it now.  */
25824       rtx_insn *insn;
25825       rtx addr = gen_rtx_MEM (SImode,
25826                               gen_rtx_POST_INC (SImode,
25827                               stack_pointer_rtx));
25828       set_mem_alias_set (addr, get_frame_alias_set ());
25829       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25830       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25831                                          gen_rtx_REG (SImode, IP_REGNUM),
25832                                          NULL_RTX);
25833     }
25834
25835   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25836     return;
25837
25838   if (crtl->calls_eh_return)
25839     emit_insn (gen_addsi3 (stack_pointer_rtx,
25840                            stack_pointer_rtx,
25841                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25842
25843   if (IS_STACKALIGN (func_type))
25844     /* Restore the original stack pointer.  Before prologue, the stack was
25845        realigned and the original stack pointer saved in r0.  For details,
25846        see comment in arm_expand_prologue.  */
25847     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25848
25849   emit_jump_insn (simple_return_rtx);
25850 }
25851
25852 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25853    function is not a sibcall.  */
25854 void
25855 arm_expand_epilogue (bool really_return)
25856 {
25857   unsigned long func_type;
25858   unsigned long saved_regs_mask;
25859   int num_regs = 0;
25860   int i;
25861   int amount;
25862   arm_stack_offsets *offsets;
25863
25864   func_type = arm_current_func_type ();
25865
25866   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25867      let output_return_instruction take care of instruction emission if any.  */
25868   if (IS_NAKED (func_type)
25869       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25870     {
25871       if (really_return)
25872         emit_jump_insn (simple_return_rtx);
25873       return;
25874     }
25875
25876   /* If we are throwing an exception, then we really must be doing a
25877      return, so we can't tail-call.  */
25878   gcc_assert (!crtl->calls_eh_return || really_return);
25879
25880   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25881     {
25882       arm_expand_epilogue_apcs_frame (really_return);
25883       return;
25884     }
25885
25886   /* Get frame offsets for ARM.  */
25887   offsets = arm_get_frame_offsets ();
25888   saved_regs_mask = offsets->saved_regs_mask;
25889   num_regs = bit_count (saved_regs_mask);
25890
25891   if (frame_pointer_needed)
25892     {
25893       rtx_insn *insn;
25894       /* Restore stack pointer if necessary.  */
25895       if (TARGET_ARM)
25896         {
25897           /* In ARM mode, frame pointer points to first saved register.
25898              Restore stack pointer to last saved register.  */
25899           amount = offsets->frame - offsets->saved_regs;
25900
25901           /* Force out any pending memory operations that reference stacked data
25902              before stack de-allocation occurs.  */
25903           emit_insn (gen_blockage ());
25904           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25905                             hard_frame_pointer_rtx,
25906                             GEN_INT (amount)));
25907           arm_add_cfa_adjust_cfa_note (insn, amount,
25908                                        stack_pointer_rtx,
25909                                        hard_frame_pointer_rtx);
25910
25911           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25912              deleted.  */
25913           emit_insn (gen_force_register_use (stack_pointer_rtx));
25914         }
25915       else
25916         {
25917           /* In Thumb-2 mode, the frame pointer points to the last saved
25918              register.  */
25919           amount = offsets->locals_base - offsets->saved_regs;
25920           if (amount)
25921             {
25922               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25923                                 hard_frame_pointer_rtx,
25924                                 GEN_INT (amount)));
25925               arm_add_cfa_adjust_cfa_note (insn, amount,
25926                                            hard_frame_pointer_rtx,
25927                                            hard_frame_pointer_rtx);
25928             }
25929
25930           /* Force out any pending memory operations that reference stacked data
25931              before stack de-allocation occurs.  */
25932           emit_insn (gen_blockage ());
25933           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25934                                        hard_frame_pointer_rtx));
25935           arm_add_cfa_adjust_cfa_note (insn, 0,
25936                                        stack_pointer_rtx,
25937                                        hard_frame_pointer_rtx);
25938           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25939              deleted.  */
25940           emit_insn (gen_force_register_use (stack_pointer_rtx));
25941         }
25942     }
25943   else
25944     {
25945       /* Pop off outgoing args and local frame to adjust stack pointer to
25946          last saved register.  */
25947       amount = offsets->outgoing_args - offsets->saved_regs;
25948       if (amount)
25949         {
25950           rtx_insn *tmp;
25951           /* Force out any pending memory operations that reference stacked data
25952              before stack de-allocation occurs.  */
25953           emit_insn (gen_blockage ());
25954           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25955                                        stack_pointer_rtx,
25956                                        GEN_INT (amount)));
25957           arm_add_cfa_adjust_cfa_note (tmp, amount,
25958                                        stack_pointer_rtx, stack_pointer_rtx);
25959           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25960              not deleted.  */
25961           emit_insn (gen_force_register_use (stack_pointer_rtx));
25962         }
25963     }
25964
25965   if (TARGET_HARD_FLOAT)
25966     {
25967       /* Generate VFP register multi-pop.  */
25968       int end_reg = LAST_VFP_REGNUM + 1;
25969
25970       /* Scan the registers in reverse order.  We need to match
25971          any groupings made in the prologue and generate matching
25972          vldm operations.  The need to match groups is because,
25973          unlike pop, vldm can only do consecutive regs.  */
25974       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25975         /* Look for a case where a reg does not need restoring.  */
25976         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25977             && (!df_regs_ever_live_p (i + 1)
25978                 || call_used_regs[i + 1]))
25979           {
25980             /* Restore the regs discovered so far (from reg+2 to
25981                end_reg).  */
25982             if (end_reg > i + 2)
25983               arm_emit_vfp_multi_reg_pop (i + 2,
25984                                           (end_reg - (i + 2)) / 2,
25985                                           stack_pointer_rtx);
25986             end_reg = i;
25987           }
25988
25989       /* Restore the remaining regs that we have discovered (or possibly
25990          even all of them, if the conditional in the for loop never
25991          fired).  */
25992       if (end_reg > i + 2)
25993         arm_emit_vfp_multi_reg_pop (i + 2,
25994                                     (end_reg - (i + 2)) / 2,
25995                                     stack_pointer_rtx);
25996     }
25997
25998   if (TARGET_IWMMXT)
25999     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
26000       if (df_regs_ever_live_p (i) && !call_used_regs[i])
26001         {
26002           rtx_insn *insn;
26003           rtx addr = gen_rtx_MEM (V2SImode,
26004                                   gen_rtx_POST_INC (SImode,
26005                                                     stack_pointer_rtx));
26006           set_mem_alias_set (addr, get_frame_alias_set ());
26007           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26008           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26009                                              gen_rtx_REG (V2SImode, i),
26010                                              NULL_RTX);
26011           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26012                                        stack_pointer_rtx, stack_pointer_rtx);
26013         }
26014
26015   if (saved_regs_mask)
26016     {
26017       rtx insn;
26018       bool return_in_pc = false;
26019
26020       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
26021           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
26022           && !IS_CMSE_ENTRY (func_type)
26023           && !IS_STACKALIGN (func_type)
26024           && really_return
26025           && crtl->args.pretend_args_size == 0
26026           && saved_regs_mask & (1 << LR_REGNUM)
26027           && !crtl->calls_eh_return)
26028         {
26029           saved_regs_mask &= ~(1 << LR_REGNUM);
26030           saved_regs_mask |= (1 << PC_REGNUM);
26031           return_in_pc = true;
26032         }
26033
26034       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26035         {
26036           for (i = 0; i <= LAST_ARM_REGNUM; i++)
26037             if (saved_regs_mask & (1 << i))
26038               {
26039                 rtx addr = gen_rtx_MEM (SImode,
26040                                         gen_rtx_POST_INC (SImode,
26041                                                           stack_pointer_rtx));
26042                 set_mem_alias_set (addr, get_frame_alias_set ());
26043
26044                 if (i == PC_REGNUM)
26045                   {
26046                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26047                     XVECEXP (insn, 0, 0) = ret_rtx;
26048                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
26049                                                         addr);
26050                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26051                     insn = emit_jump_insn (insn);
26052                   }
26053                 else
26054                   {
26055                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26056                                                  addr));
26057                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26058                                                        gen_rtx_REG (SImode, i),
26059                                                        NULL_RTX);
26060                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26061                                                  stack_pointer_rtx,
26062                                                  stack_pointer_rtx);
26063                   }
26064               }
26065         }
26066       else
26067         {
26068           if (TARGET_LDRD
26069               && current_tune->prefer_ldrd_strd
26070               && !optimize_function_for_size_p (cfun))
26071             {
26072               if (TARGET_THUMB2)
26073                 thumb2_emit_ldrd_pop (saved_regs_mask);
26074               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26075                 arm_emit_ldrd_pop (saved_regs_mask);
26076               else
26077                 arm_emit_multi_reg_pop (saved_regs_mask);
26078             }
26079           else
26080             arm_emit_multi_reg_pop (saved_regs_mask);
26081         }
26082
26083       if (return_in_pc)
26084         return;
26085     }
26086
26087   amount
26088     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
26089   if (amount)
26090     {
26091       int i, j;
26092       rtx dwarf = NULL_RTX;
26093       rtx_insn *tmp =
26094         emit_insn (gen_addsi3 (stack_pointer_rtx,
26095                                stack_pointer_rtx,
26096                                GEN_INT (amount)));
26097
26098       RTX_FRAME_RELATED_P (tmp) = 1;
26099
26100       if (cfun->machine->uses_anonymous_args)
26101         {
26102           /* Restore pretend args.  Refer arm_expand_prologue on how to save
26103              pretend_args in stack.  */
26104           int num_regs = crtl->args.pretend_args_size / 4;
26105           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26106           for (j = 0, i = 0; j < num_regs; i++)
26107             if (saved_regs_mask & (1 << i))
26108               {
26109                 rtx reg = gen_rtx_REG (SImode, i);
26110                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26111                 j++;
26112               }
26113           REG_NOTES (tmp) = dwarf;
26114         }
26115       arm_add_cfa_adjust_cfa_note (tmp, amount,
26116                                    stack_pointer_rtx, stack_pointer_rtx);
26117     }
26118
26119     /* Clear all caller-saved regs that are not used to return.  */
26120     if (IS_CMSE_ENTRY (arm_current_func_type ()))
26121       {
26122         /* CMSE_ENTRY always returns.  */
26123         gcc_assert (really_return);
26124         cmse_nonsecure_entry_clear_before_return ();
26125       }
26126
26127   if (!really_return)
26128     return;
26129
26130   if (crtl->calls_eh_return)
26131     emit_insn (gen_addsi3 (stack_pointer_rtx,
26132                            stack_pointer_rtx,
26133                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26134
26135   if (IS_STACKALIGN (func_type))
26136     /* Restore the original stack pointer.  Before prologue, the stack was
26137        realigned and the original stack pointer saved in r0.  For details,
26138        see comment in arm_expand_prologue.  */
26139     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26140
26141   emit_jump_insn (simple_return_rtx);
26142 }
26143
26144 /* Implementation of insn prologue_thumb1_interwork.  This is the first
26145    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
26146
26147 const char *
26148 thumb1_output_interwork (void)
26149 {
26150   const char * name;
26151   FILE *f = asm_out_file;
26152
26153   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26154   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26155               == SYMBOL_REF);
26156   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26157
26158   /* Generate code sequence to switch us into Thumb mode.  */
26159   /* The .code 32 directive has already been emitted by
26160      ASM_DECLARE_FUNCTION_NAME.  */
26161   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26162   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26163
26164   /* Generate a label, so that the debugger will notice the
26165      change in instruction sets.  This label is also used by
26166      the assembler to bypass the ARM code when this function
26167      is called from a Thumb encoded function elsewhere in the
26168      same file.  Hence the definition of STUB_NAME here must
26169      agree with the definition in gas/config/tc-arm.c.  */
26170
26171 #define STUB_NAME ".real_start_of"
26172
26173   fprintf (f, "\t.code\t16\n");
26174 #ifdef ARM_PE
26175   if (arm_dllexport_name_p (name))
26176     name = arm_strip_name_encoding (name);
26177 #endif
26178   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26179   fprintf (f, "\t.thumb_func\n");
26180   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26181
26182   return "";
26183 }
26184
26185 /* Handle the case of a double word load into a low register from
26186    a computed memory address.  The computed address may involve a
26187    register which is overwritten by the load.  */
26188 const char *
26189 thumb_load_double_from_address (rtx *operands)
26190 {
26191   rtx addr;
26192   rtx base;
26193   rtx offset;
26194   rtx arg1;
26195   rtx arg2;
26196
26197   gcc_assert (REG_P (operands[0]));
26198   gcc_assert (MEM_P (operands[1]));
26199
26200   /* Get the memory address.  */
26201   addr = XEXP (operands[1], 0);
26202
26203   /* Work out how the memory address is computed.  */
26204   switch (GET_CODE (addr))
26205     {
26206     case REG:
26207       operands[2] = adjust_address (operands[1], SImode, 4);
26208
26209       if (REGNO (operands[0]) == REGNO (addr))
26210         {
26211           output_asm_insn ("ldr\t%H0, %2", operands);
26212           output_asm_insn ("ldr\t%0, %1", operands);
26213         }
26214       else
26215         {
26216           output_asm_insn ("ldr\t%0, %1", operands);
26217           output_asm_insn ("ldr\t%H0, %2", operands);
26218         }
26219       break;
26220
26221     case CONST:
26222       /* Compute <address> + 4 for the high order load.  */
26223       operands[2] = adjust_address (operands[1], SImode, 4);
26224
26225       output_asm_insn ("ldr\t%0, %1", operands);
26226       output_asm_insn ("ldr\t%H0, %2", operands);
26227       break;
26228
26229     case PLUS:
26230       arg1   = XEXP (addr, 0);
26231       arg2   = XEXP (addr, 1);
26232
26233       if (CONSTANT_P (arg1))
26234         base = arg2, offset = arg1;
26235       else
26236         base = arg1, offset = arg2;
26237
26238       gcc_assert (REG_P (base));
26239
26240       /* Catch the case of <address> = <reg> + <reg> */
26241       if (REG_P (offset))
26242         {
26243           int reg_offset = REGNO (offset);
26244           int reg_base   = REGNO (base);
26245           int reg_dest   = REGNO (operands[0]);
26246
26247           /* Add the base and offset registers together into the
26248              higher destination register.  */
26249           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26250                        reg_dest + 1, reg_base, reg_offset);
26251
26252           /* Load the lower destination register from the address in
26253              the higher destination register.  */
26254           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26255                        reg_dest, reg_dest + 1);
26256
26257           /* Load the higher destination register from its own address
26258              plus 4.  */
26259           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26260                        reg_dest + 1, reg_dest + 1);
26261         }
26262       else
26263         {
26264           /* Compute <address> + 4 for the high order load.  */
26265           operands[2] = adjust_address (operands[1], SImode, 4);
26266
26267           /* If the computed address is held in the low order register
26268              then load the high order register first, otherwise always
26269              load the low order register first.  */
26270           if (REGNO (operands[0]) == REGNO (base))
26271             {
26272               output_asm_insn ("ldr\t%H0, %2", operands);
26273               output_asm_insn ("ldr\t%0, %1", operands);
26274             }
26275           else
26276             {
26277               output_asm_insn ("ldr\t%0, %1", operands);
26278               output_asm_insn ("ldr\t%H0, %2", operands);
26279             }
26280         }
26281       break;
26282
26283     case LABEL_REF:
26284       /* With no registers to worry about we can just load the value
26285          directly.  */
26286       operands[2] = adjust_address (operands[1], SImode, 4);
26287
26288       output_asm_insn ("ldr\t%H0, %2", operands);
26289       output_asm_insn ("ldr\t%0, %1", operands);
26290       break;
26291
26292     default:
26293       gcc_unreachable ();
26294     }
26295
26296   return "";
26297 }
26298
26299 const char *
26300 thumb_output_move_mem_multiple (int n, rtx *operands)
26301 {
26302   switch (n)
26303     {
26304     case 2:
26305       if (REGNO (operands[4]) > REGNO (operands[5]))
26306         std::swap (operands[4], operands[5]);
26307
26308       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26309       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26310       break;
26311
26312     case 3:
26313       if (REGNO (operands[4]) > REGNO (operands[5]))
26314         std::swap (operands[4], operands[5]);
26315       if (REGNO (operands[5]) > REGNO (operands[6]))
26316         std::swap (operands[5], operands[6]);
26317       if (REGNO (operands[4]) > REGNO (operands[5]))
26318         std::swap (operands[4], operands[5]);
26319
26320       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26321       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26322       break;
26323
26324     default:
26325       gcc_unreachable ();
26326     }
26327
26328   return "";
26329 }
26330
26331 /* Output a call-via instruction for thumb state.  */
26332 const char *
26333 thumb_call_via_reg (rtx reg)
26334 {
26335   int regno = REGNO (reg);
26336   rtx *labelp;
26337
26338   gcc_assert (regno < LR_REGNUM);
26339
26340   /* If we are in the normal text section we can use a single instance
26341      per compilation unit.  If we are doing function sections, then we need
26342      an entry per section, since we can't rely on reachability.  */
26343   if (in_section == text_section)
26344     {
26345       thumb_call_reg_needed = 1;
26346
26347       if (thumb_call_via_label[regno] == NULL)
26348         thumb_call_via_label[regno] = gen_label_rtx ();
26349       labelp = thumb_call_via_label + regno;
26350     }
26351   else
26352     {
26353       if (cfun->machine->call_via[regno] == NULL)
26354         cfun->machine->call_via[regno] = gen_label_rtx ();
26355       labelp = cfun->machine->call_via + regno;
26356     }
26357
26358   output_asm_insn ("bl\t%a0", labelp);
26359   return "";
26360 }
26361
26362 /* Routines for generating rtl.  */
26363 void
26364 thumb_expand_cpymemqi (rtx *operands)
26365 {
26366   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26367   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26368   HOST_WIDE_INT len = INTVAL (operands[2]);
26369   HOST_WIDE_INT offset = 0;
26370
26371   while (len >= 12)
26372     {
26373       emit_insn (gen_cpymem12b (out, in, out, in));
26374       len -= 12;
26375     }
26376
26377   if (len >= 8)
26378     {
26379       emit_insn (gen_cpymem8b (out, in, out, in));
26380       len -= 8;
26381     }
26382
26383   if (len >= 4)
26384     {
26385       rtx reg = gen_reg_rtx (SImode);
26386       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26387       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26388       len -= 4;
26389       offset += 4;
26390     }
26391
26392   if (len >= 2)
26393     {
26394       rtx reg = gen_reg_rtx (HImode);
26395       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26396                                               plus_constant (Pmode, in,
26397                                                              offset))));
26398       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26399                                                                 offset)),
26400                             reg));
26401       len -= 2;
26402       offset += 2;
26403     }
26404
26405   if (len)
26406     {
26407       rtx reg = gen_reg_rtx (QImode);
26408       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26409                                               plus_constant (Pmode, in,
26410                                                              offset))));
26411       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26412                                                                 offset)),
26413                             reg));
26414     }
26415 }
26416
26417 void
26418 thumb_reload_out_hi (rtx *operands)
26419 {
26420   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26421 }
26422
26423 /* Return the length of a function name prefix
26424     that starts with the character 'c'.  */
26425 static int
26426 arm_get_strip_length (int c)
26427 {
26428   switch (c)
26429     {
26430     ARM_NAME_ENCODING_LENGTHS
26431       default: return 0;
26432     }
26433 }
26434
26435 /* Return a pointer to a function's name with any
26436    and all prefix encodings stripped from it.  */
26437 const char *
26438 arm_strip_name_encoding (const char *name)
26439 {
26440   int skip;
26441
26442   while ((skip = arm_get_strip_length (* name)))
26443     name += skip;
26444
26445   return name;
26446 }
26447
26448 /* If there is a '*' anywhere in the name's prefix, then
26449    emit the stripped name verbatim, otherwise prepend an
26450    underscore if leading underscores are being used.  */
26451 void
26452 arm_asm_output_labelref (FILE *stream, const char *name)
26453 {
26454   int skip;
26455   int verbatim = 0;
26456
26457   while ((skip = arm_get_strip_length (* name)))
26458     {
26459       verbatim |= (*name == '*');
26460       name += skip;
26461     }
26462
26463   if (verbatim)
26464     fputs (name, stream);
26465   else
26466     asm_fprintf (stream, "%U%s", name);
26467 }
26468
26469 /* This function is used to emit an EABI tag and its associated value.
26470    We emit the numerical value of the tag in case the assembler does not
26471    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26472    the tag name in a comment so that anyone reading the assembler output
26473    will know which tag is being set.
26474
26475    This function is not static because arm-c.c needs it too.  */
26476
26477 void
26478 arm_emit_eabi_attribute (const char *name, int num, int val)
26479 {
26480   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26481   if (flag_verbose_asm || flag_debug_asm)
26482     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26483   asm_fprintf (asm_out_file, "\n");
26484 }
26485
26486 /* This function is used to print CPU tuning information as comment
26487    in assembler file.  Pointers are not printed for now.  */
26488
26489 void
26490 arm_print_tune_info (void)
26491 {
26492   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26493   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26494                current_tune->constant_limit);
26495   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26496                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26497   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26498                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26499   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26500                "prefetch.l1_cache_size:\t%d\n",
26501                current_tune->prefetch.l1_cache_size);
26502   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26503                "prefetch.l1_cache_line_size:\t%d\n",
26504                current_tune->prefetch.l1_cache_line_size);
26505   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26506                "prefer_constant_pool:\t%d\n",
26507                (int) current_tune->prefer_constant_pool);
26508   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26509                "branch_cost:\t(s:speed, p:predictable)\n");
26510   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26511   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26512                current_tune->branch_cost (false, false));
26513   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26514                current_tune->branch_cost (false, true));
26515   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26516                current_tune->branch_cost (true, false));
26517   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26518                current_tune->branch_cost (true, true));
26519   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26520                "prefer_ldrd_strd:\t%d\n",
26521                (int) current_tune->prefer_ldrd_strd);
26522   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26523                "logical_op_non_short_circuit:\t[%d,%d]\n",
26524                (int) current_tune->logical_op_non_short_circuit_thumb,
26525                (int) current_tune->logical_op_non_short_circuit_arm);
26526   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26527                "prefer_neon_for_64bits:\t%d\n",
26528                (int) current_tune->prefer_neon_for_64bits);
26529   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26530                "disparage_flag_setting_t16_encodings:\t%d\n",
26531                (int) current_tune->disparage_flag_setting_t16_encodings);
26532   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26533                "string_ops_prefer_neon:\t%d\n",
26534                (int) current_tune->string_ops_prefer_neon);
26535   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26536                "max_insns_inline_memset:\t%d\n",
26537                current_tune->max_insns_inline_memset);
26538   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26539                current_tune->fusible_ops);
26540   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26541                (int) current_tune->sched_autopref);
26542 }
26543
26544 /* Print .arch and .arch_extension directives corresponding to the
26545    current architecture configuration.  */
26546 static void
26547 arm_print_asm_arch_directives ()
26548 {
26549   const arch_option *arch
26550     = arm_parse_arch_option_name (all_architectures, "-march",
26551                                   arm_active_target.arch_name);
26552   auto_sbitmap opt_bits (isa_num_bits);
26553
26554   gcc_assert (arch);
26555
26556   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26557   arm_last_printed_arch_string = arm_active_target.arch_name;
26558   if (!arch->common.extensions)
26559     return;
26560
26561   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26562        opt->name != NULL;
26563        opt++)
26564     {
26565       if (!opt->remove)
26566         {
26567           arm_initialize_isa (opt_bits, opt->isa_bits);
26568
26569           /* If every feature bit of this option is set in the target
26570              ISA specification, print out the option name.  However,
26571              don't print anything if all the bits are part of the
26572              FPU specification.  */
26573           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26574               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26575             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26576         }
26577     }
26578 }
26579
26580 static void
26581 arm_file_start (void)
26582 {
26583   int val;
26584
26585   if (TARGET_BPABI)
26586     {
26587       /* We don't have a specified CPU.  Use the architecture to
26588          generate the tags.
26589
26590          Note: it might be better to do this unconditionally, then the
26591          assembler would not need to know about all new CPU names as
26592          they are added.  */
26593       if (!arm_active_target.core_name)
26594         {
26595           /* armv7ve doesn't support any extensions.  */
26596           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26597             {
26598               /* Keep backward compatability for assemblers
26599                  which don't support armv7ve.  */
26600               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26601               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26602               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26603               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26604               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26605               arm_last_printed_arch_string = "armv7ve";
26606             }
26607           else
26608             arm_print_asm_arch_directives ();
26609         }
26610       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26611         {
26612           asm_fprintf (asm_out_file, "\t.arch %s\n",
26613                        arm_active_target.core_name + 8);
26614           arm_last_printed_arch_string = arm_active_target.core_name + 8;
26615         }
26616       else
26617         {
26618           const char* truncated_name
26619             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26620           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26621         }
26622
26623       if (print_tune_info)
26624         arm_print_tune_info ();
26625
26626       if (! TARGET_SOFT_FLOAT)
26627         {
26628           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26629             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26630
26631           if (TARGET_HARD_FLOAT_ABI)
26632             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26633         }
26634
26635       /* Some of these attributes only apply when the corresponding features
26636          are used.  However we don't have any easy way of figuring this out.
26637          Conservatively record the setting that would have been used.  */
26638
26639       if (flag_rounding_math)
26640         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26641
26642       if (!flag_unsafe_math_optimizations)
26643         {
26644           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26645           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26646         }
26647       if (flag_signaling_nans)
26648         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26649
26650       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26651                            flag_finite_math_only ? 1 : 3);
26652
26653       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26654       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26655       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26656                                flag_short_enums ? 1 : 2);
26657
26658       /* Tag_ABI_optimization_goals.  */
26659       if (optimize_size)
26660         val = 4;
26661       else if (optimize >= 2)
26662         val = 2;
26663       else if (optimize)
26664         val = 1;
26665       else
26666         val = 6;
26667       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26668
26669       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26670                                unaligned_access);
26671
26672       if (arm_fp16_format)
26673         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26674                              (int) arm_fp16_format);
26675
26676       if (arm_lang_output_object_attributes_hook)
26677         arm_lang_output_object_attributes_hook();
26678     }
26679
26680   default_file_start ();
26681 }
26682
26683 static void
26684 arm_file_end (void)
26685 {
26686   int regno;
26687
26688   if (NEED_INDICATE_EXEC_STACK)
26689     /* Add .note.GNU-stack.  */
26690     file_end_indicate_exec_stack ();
26691
26692   if (! thumb_call_reg_needed)
26693     return;
26694
26695   switch_to_section (text_section);
26696   asm_fprintf (asm_out_file, "\t.code 16\n");
26697   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26698
26699   for (regno = 0; regno < LR_REGNUM; regno++)
26700     {
26701       rtx label = thumb_call_via_label[regno];
26702
26703       if (label != 0)
26704         {
26705           targetm.asm_out.internal_label (asm_out_file, "L",
26706                                           CODE_LABEL_NUMBER (label));
26707           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26708         }
26709     }
26710 }
26711
26712 #ifndef ARM_PE
26713 /* Symbols in the text segment can be accessed without indirecting via the
26714    constant pool; it may take an extra binary operation, but this is still
26715    faster than indirecting via memory.  Don't do this when not optimizing,
26716    since we won't be calculating al of the offsets necessary to do this
26717    simplification.  */
26718
26719 static void
26720 arm_encode_section_info (tree decl, rtx rtl, int first)
26721 {
26722   if (optimize > 0 && TREE_CONSTANT (decl))
26723     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26724
26725   default_encode_section_info (decl, rtl, first);
26726 }
26727 #endif /* !ARM_PE */
26728
26729 static void
26730 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26731 {
26732   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26733       && !strcmp (prefix, "L"))
26734     {
26735       arm_ccfsm_state = 0;
26736       arm_target_insn = NULL;
26737     }
26738   default_internal_label (stream, prefix, labelno);
26739 }
26740
26741 /* Output code to add DELTA to the first argument, and then jump
26742    to FUNCTION.  Used for C++ multiple inheritance.  */
26743
26744 static void
26745 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26746                      HOST_WIDE_INT, tree function)
26747 {
26748   static int thunk_label = 0;
26749   char label[256];
26750   char labelpc[256];
26751   int mi_delta = delta;
26752   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26753   int shift = 0;
26754   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26755                     ? 1 : 0);
26756   if (mi_delta < 0)
26757     mi_delta = - mi_delta;
26758
26759   final_start_function (emit_barrier (), file, 1);
26760
26761   if (TARGET_THUMB1)
26762     {
26763       int labelno = thunk_label++;
26764       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26765       /* Thunks are entered in arm mode when available.  */
26766       if (TARGET_THUMB1_ONLY)
26767         {
26768           /* push r3 so we can use it as a temporary.  */
26769           /* TODO: Omit this save if r3 is not used.  */
26770           fputs ("\tpush {r3}\n", file);
26771           fputs ("\tldr\tr3, ", file);
26772         }
26773       else
26774         {
26775           fputs ("\tldr\tr12, ", file);
26776         }
26777       assemble_name (file, label);
26778       fputc ('\n', file);
26779       if (flag_pic)
26780         {
26781           /* If we are generating PIC, the ldr instruction below loads
26782              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26783              the address of the add + 8, so we have:
26784
26785              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26786                  = target + 1.
26787
26788              Note that we have "+ 1" because some versions of GNU ld
26789              don't set the low bit of the result for R_ARM_REL32
26790              relocations against thumb function symbols.
26791              On ARMv6M this is +4, not +8.  */
26792           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26793           assemble_name (file, labelpc);
26794           fputs (":\n", file);
26795           if (TARGET_THUMB1_ONLY)
26796             {
26797               /* This is 2 insns after the start of the thunk, so we know it
26798                  is 4-byte aligned.  */
26799               fputs ("\tadd\tr3, pc, r3\n", file);
26800               fputs ("\tmov r12, r3\n", file);
26801             }
26802           else
26803             fputs ("\tadd\tr12, pc, r12\n", file);
26804         }
26805       else if (TARGET_THUMB1_ONLY)
26806         fputs ("\tmov r12, r3\n", file);
26807     }
26808   if (TARGET_THUMB1_ONLY)
26809     {
26810       if (mi_delta > 255)
26811         {
26812           fputs ("\tldr\tr3, ", file);
26813           assemble_name (file, label);
26814           fputs ("+4\n", file);
26815           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26816                        mi_op, this_regno, this_regno);
26817         }
26818       else if (mi_delta != 0)
26819         {
26820           /* Thumb1 unified syntax requires s suffix in instruction name when
26821              one of the operands is immediate.  */
26822           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26823                        mi_op, this_regno, this_regno,
26824                        mi_delta);
26825         }
26826     }
26827   else
26828     {
26829       /* TODO: Use movw/movt for large constants when available.  */
26830       while (mi_delta != 0)
26831         {
26832           if ((mi_delta & (3 << shift)) == 0)
26833             shift += 2;
26834           else
26835             {
26836               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26837                            mi_op, this_regno, this_regno,
26838                            mi_delta & (0xff << shift));
26839               mi_delta &= ~(0xff << shift);
26840               shift += 8;
26841             }
26842         }
26843     }
26844   if (TARGET_THUMB1)
26845     {
26846       if (TARGET_THUMB1_ONLY)
26847         fputs ("\tpop\t{r3}\n", file);
26848
26849       fprintf (file, "\tbx\tr12\n");
26850       ASM_OUTPUT_ALIGN (file, 2);
26851       assemble_name (file, label);
26852       fputs (":\n", file);
26853       if (flag_pic)
26854         {
26855           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26856           rtx tem = XEXP (DECL_RTL (function), 0);
26857           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26858              pipeline offset is four rather than eight.  Adjust the offset
26859              accordingly.  */
26860           tem = plus_constant (GET_MODE (tem), tem,
26861                                TARGET_THUMB1_ONLY ? -3 : -7);
26862           tem = gen_rtx_MINUS (GET_MODE (tem),
26863                                tem,
26864                                gen_rtx_SYMBOL_REF (Pmode,
26865                                                    ggc_strdup (labelpc)));
26866           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26867         }
26868       else
26869         /* Output ".word .LTHUNKn".  */
26870         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26871
26872       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26873         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26874     }
26875   else
26876     {
26877       fputs ("\tb\t", file);
26878       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26879       if (NEED_PLT_RELOC)
26880         fputs ("(PLT)", file);
26881       fputc ('\n', file);
26882     }
26883
26884   final_end_function ();
26885 }
26886
26887 /* MI thunk handling for TARGET_32BIT.  */
26888
26889 static void
26890 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26891                        HOST_WIDE_INT vcall_offset, tree function)
26892 {
26893   const bool long_call_p = arm_is_long_call_p (function);
26894
26895   /* On ARM, this_regno is R0 or R1 depending on
26896      whether the function returns an aggregate or not.
26897   */
26898   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26899                                        function)
26900                     ? R1_REGNUM : R0_REGNUM);
26901
26902   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26903   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26904   reload_completed = 1;
26905   emit_note (NOTE_INSN_PROLOGUE_END);
26906
26907   /* Add DELTA to THIS_RTX.  */
26908   if (delta != 0)
26909     arm_split_constant (PLUS, Pmode, NULL_RTX,
26910                         delta, this_rtx, this_rtx, false);
26911
26912   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26913   if (vcall_offset != 0)
26914     {
26915       /* Load *THIS_RTX.  */
26916       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26917       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26918       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26919                           false);
26920       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26921       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26922       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26923     }
26924
26925   /* Generate a tail call to the target function.  */
26926   if (!TREE_USED (function))
26927     {
26928       assemble_external (function);
26929       TREE_USED (function) = 1;
26930     }
26931   rtx funexp = XEXP (DECL_RTL (function), 0);
26932   if (long_call_p)
26933     {
26934       emit_move_insn (temp, funexp);
26935       funexp = temp;
26936     }
26937   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26938   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26939   SIBLING_CALL_P (insn) = 1;
26940   emit_barrier ();
26941
26942   /* Indirect calls require a bit of fixup in PIC mode.  */
26943   if (long_call_p)
26944     {
26945       split_all_insns_noflow ();
26946       arm_reorg ();
26947     }
26948
26949   insn = get_insns ();
26950   shorten_branches (insn);
26951   final_start_function (insn, file, 1);
26952   final (insn, file, 1);
26953   final_end_function ();
26954
26955   /* Stop pretending this is a post-reload pass.  */
26956   reload_completed = 0;
26957 }
26958
26959 /* Output code to add DELTA to the first argument, and then jump
26960    to FUNCTION.  Used for C++ multiple inheritance.  */
26961
26962 static void
26963 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26964                      HOST_WIDE_INT vcall_offset, tree function)
26965 {
26966   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
26967
26968   assemble_start_function (thunk, fnname);
26969   if (TARGET_32BIT)
26970     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26971   else
26972     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26973   assemble_end_function (thunk, fnname);
26974 }
26975
26976 int
26977 arm_emit_vector_const (FILE *file, rtx x)
26978 {
26979   int i;
26980   const char * pattern;
26981
26982   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26983
26984   switch (GET_MODE (x))
26985     {
26986     case E_V2SImode: pattern = "%08x"; break;
26987     case E_V4HImode: pattern = "%04x"; break;
26988     case E_V8QImode: pattern = "%02x"; break;
26989     default:       gcc_unreachable ();
26990     }
26991
26992   fprintf (file, "0x");
26993   for (i = CONST_VECTOR_NUNITS (x); i--;)
26994     {
26995       rtx element;
26996
26997       element = CONST_VECTOR_ELT (x, i);
26998       fprintf (file, pattern, INTVAL (element));
26999     }
27000
27001   return 1;
27002 }
27003
27004 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27005    HFmode constant pool entries are actually loaded with ldr.  */
27006 void
27007 arm_emit_fp16_const (rtx c)
27008 {
27009   long bits;
27010
27011   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
27012   if (WORDS_BIG_ENDIAN)
27013     assemble_zeros (2);
27014   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27015   if (!WORDS_BIG_ENDIAN)
27016     assemble_zeros (2);
27017 }
27018
27019 const char *
27020 arm_output_load_gr (rtx *operands)
27021 {
27022   rtx reg;
27023   rtx offset;
27024   rtx wcgr;
27025   rtx sum;
27026
27027   if (!MEM_P (operands [1])
27028       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27029       || !REG_P (reg = XEXP (sum, 0))
27030       || !CONST_INT_P (offset = XEXP (sum, 1))
27031       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27032     return "wldrw%?\t%0, %1";
27033
27034   /* Fix up an out-of-range load of a GR register.  */
27035   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27036   wcgr = operands[0];
27037   operands[0] = reg;
27038   output_asm_insn ("ldr%?\t%0, %1", operands);
27039
27040   operands[0] = wcgr;
27041   operands[1] = reg;
27042   output_asm_insn ("tmcr%?\t%0, %1", operands);
27043   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27044
27045   return "";
27046 }
27047
27048 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27049
27050    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27051    named arg and all anonymous args onto the stack.
27052    XXX I know the prologue shouldn't be pushing registers, but it is faster
27053    that way.  */
27054
27055 static void
27056 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27057                             machine_mode mode,
27058                             tree type,
27059                             int *pretend_size,
27060                             int second_time ATTRIBUTE_UNUSED)
27061 {
27062   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27063   int nregs;
27064
27065   cfun->machine->uses_anonymous_args = 1;
27066   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27067     {
27068       nregs = pcum->aapcs_ncrn;
27069       if (nregs & 1)
27070         {
27071           int res = arm_needs_doubleword_align (mode, type);
27072           if (res < 0 && warn_psabi)
27073             inform (input_location, "parameter passing for argument of "
27074                     "type %qT changed in GCC 7.1", type);
27075           else if (res > 0)
27076             {
27077               nregs++;
27078               if (res > 1 && warn_psabi)
27079                 inform (input_location,
27080                         "parameter passing for argument of type "
27081                         "%qT changed in GCC 9.1", type);
27082             }
27083         }
27084     }
27085   else
27086     nregs = pcum->nregs;
27087
27088   if (nregs < NUM_ARG_REGS)
27089     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27090 }
27091
27092 /* We can't rely on the caller doing the proper promotion when
27093    using APCS or ATPCS.  */
27094
27095 static bool
27096 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27097 {
27098     return !TARGET_AAPCS_BASED;
27099 }
27100
27101 static machine_mode
27102 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27103                            machine_mode mode,
27104                            int *punsignedp ATTRIBUTE_UNUSED,
27105                            const_tree fntype ATTRIBUTE_UNUSED,
27106                            int for_return ATTRIBUTE_UNUSED)
27107 {
27108   if (GET_MODE_CLASS (mode) == MODE_INT
27109       && GET_MODE_SIZE (mode) < 4)
27110     return SImode;
27111
27112   return mode;
27113 }
27114
27115
27116 static bool
27117 arm_default_short_enums (void)
27118 {
27119   return ARM_DEFAULT_SHORT_ENUMS;
27120 }
27121
27122
27123 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
27124
27125 static bool
27126 arm_align_anon_bitfield (void)
27127 {
27128   return TARGET_AAPCS_BASED;
27129 }
27130
27131
27132 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
27133
27134 static tree
27135 arm_cxx_guard_type (void)
27136 {
27137   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27138 }
27139
27140
27141 /* The EABI says test the least significant bit of a guard variable.  */
27142
27143 static bool
27144 arm_cxx_guard_mask_bit (void)
27145 {
27146   return TARGET_AAPCS_BASED;
27147 }
27148
27149
27150 /* The EABI specifies that all array cookies are 8 bytes long.  */
27151
27152 static tree
27153 arm_get_cookie_size (tree type)
27154 {
27155   tree size;
27156
27157   if (!TARGET_AAPCS_BASED)
27158     return default_cxx_get_cookie_size (type);
27159
27160   size = build_int_cst (sizetype, 8);
27161   return size;
27162 }
27163
27164
27165 /* The EABI says that array cookies should also contain the element size.  */
27166
27167 static bool
27168 arm_cookie_has_size (void)
27169 {
27170   return TARGET_AAPCS_BASED;
27171 }
27172
27173
27174 /* The EABI says constructors and destructors should return a pointer to
27175    the object constructed/destroyed.  */
27176
27177 static bool
27178 arm_cxx_cdtor_returns_this (void)
27179 {
27180   return TARGET_AAPCS_BASED;
27181 }
27182
27183 /* The EABI says that an inline function may never be the key
27184    method.  */
27185
27186 static bool
27187 arm_cxx_key_method_may_be_inline (void)
27188 {
27189   return !TARGET_AAPCS_BASED;
27190 }
27191
27192 static void
27193 arm_cxx_determine_class_data_visibility (tree decl)
27194 {
27195   if (!TARGET_AAPCS_BASED
27196       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27197     return;
27198
27199   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27200      is exported.  However, on systems without dynamic vague linkage,
27201      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
27202   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27203     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27204   else
27205     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27206   DECL_VISIBILITY_SPECIFIED (decl) = 1;
27207 }
27208
27209 static bool
27210 arm_cxx_class_data_always_comdat (void)
27211 {
27212   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27213      vague linkage if the class has no key function.  */
27214   return !TARGET_AAPCS_BASED;
27215 }
27216
27217
27218 /* The EABI says __aeabi_atexit should be used to register static
27219    destructors.  */
27220
27221 static bool
27222 arm_cxx_use_aeabi_atexit (void)
27223 {
27224   return TARGET_AAPCS_BASED;
27225 }
27226
27227
27228 void
27229 arm_set_return_address (rtx source, rtx scratch)
27230 {
27231   arm_stack_offsets *offsets;
27232   HOST_WIDE_INT delta;
27233   rtx addr, mem;
27234   unsigned long saved_regs;
27235
27236   offsets = arm_get_frame_offsets ();
27237   saved_regs = offsets->saved_regs_mask;
27238
27239   if ((saved_regs & (1 << LR_REGNUM)) == 0)
27240     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27241   else
27242     {
27243       if (frame_pointer_needed)
27244         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27245       else
27246         {
27247           /* LR will be the first saved register.  */
27248           delta = offsets->outgoing_args - (offsets->frame + 4);
27249
27250
27251           if (delta >= 4096)
27252             {
27253               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27254                                      GEN_INT (delta & ~4095)));
27255               addr = scratch;
27256               delta &= 4095;
27257             }
27258           else
27259             addr = stack_pointer_rtx;
27260
27261           addr = plus_constant (Pmode, addr, delta);
27262         }
27263
27264       /* The store needs to be marked to prevent DSE from deleting
27265          it as dead if it is based on fp.  */
27266       mem = gen_frame_mem (Pmode, addr);
27267       MEM_VOLATILE_P (mem) = true;
27268       emit_move_insn (mem, source);
27269     }
27270 }
27271
27272
27273 void
27274 thumb_set_return_address (rtx source, rtx scratch)
27275 {
27276   arm_stack_offsets *offsets;
27277   HOST_WIDE_INT delta;
27278   HOST_WIDE_INT limit;
27279   int reg;
27280   rtx addr, mem;
27281   unsigned long mask;
27282
27283   emit_use (source);
27284
27285   offsets = arm_get_frame_offsets ();
27286   mask = offsets->saved_regs_mask;
27287   if (mask & (1 << LR_REGNUM))
27288     {
27289       limit = 1024;
27290       /* Find the saved regs.  */
27291       if (frame_pointer_needed)
27292         {
27293           delta = offsets->soft_frame - offsets->saved_args;
27294           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27295           if (TARGET_THUMB1)
27296             limit = 128;
27297         }
27298       else
27299         {
27300           delta = offsets->outgoing_args - offsets->saved_args;
27301           reg = SP_REGNUM;
27302         }
27303       /* Allow for the stack frame.  */
27304       if (TARGET_THUMB1 && TARGET_BACKTRACE)
27305         delta -= 16;
27306       /* The link register is always the first saved register.  */
27307       delta -= 4;
27308
27309       /* Construct the address.  */
27310       addr = gen_rtx_REG (SImode, reg);
27311       if (delta > limit)
27312         {
27313           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27314           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27315           addr = scratch;
27316         }
27317       else
27318         addr = plus_constant (Pmode, addr, delta);
27319
27320       /* The store needs to be marked to prevent DSE from deleting
27321          it as dead if it is based on fp.  */
27322       mem = gen_frame_mem (Pmode, addr);
27323       MEM_VOLATILE_P (mem) = true;
27324       emit_move_insn (mem, source);
27325     }
27326   else
27327     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27328 }
27329
27330 /* Implements target hook vector_mode_supported_p.  */
27331 bool
27332 arm_vector_mode_supported_p (machine_mode mode)
27333 {
27334   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27335   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27336       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27337       || mode == V2DImode || mode == V8HFmode))
27338     return true;
27339
27340   if ((TARGET_NEON || TARGET_IWMMXT)
27341       && ((mode == V2SImode)
27342           || (mode == V4HImode)
27343           || (mode == V8QImode)))
27344     return true;
27345
27346   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27347       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27348       || mode == V2HAmode))
27349     return true;
27350
27351   return false;
27352 }
27353
27354 /* Implements target hook array_mode_supported_p.  */
27355
27356 static bool
27357 arm_array_mode_supported_p (machine_mode mode,
27358                             unsigned HOST_WIDE_INT nelems)
27359 {
27360   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27361      for now, as the lane-swapping logic needs to be extended in the expanders.
27362      See PR target/82518.  */
27363   if (TARGET_NEON && !BYTES_BIG_ENDIAN
27364       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27365       && (nelems >= 2 && nelems <= 4))
27366     return true;
27367
27368   return false;
27369 }
27370
27371 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27372    registers when autovectorizing for Neon, at least until multiple vector
27373    widths are supported properly by the middle-end.  */
27374
27375 static machine_mode
27376 arm_preferred_simd_mode (scalar_mode mode)
27377 {
27378   if (TARGET_NEON)
27379     switch (mode)
27380       {
27381       case E_SFmode:
27382         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27383       case E_SImode:
27384         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27385       case E_HImode:
27386         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27387       case E_QImode:
27388         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27389       case E_DImode:
27390         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27391           return V2DImode;
27392         break;
27393
27394       default:;
27395       }
27396
27397   if (TARGET_REALLY_IWMMXT)
27398     switch (mode)
27399       {
27400       case E_SImode:
27401         return V2SImode;
27402       case E_HImode:
27403         return V4HImode;
27404       case E_QImode:
27405         return V8QImode;
27406
27407       default:;
27408       }
27409
27410   return word_mode;
27411 }
27412
27413 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27414
27415    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27416    using r0-r4 for function arguments, r7 for the stack frame and don't have
27417    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27418    potentially problematic instructions accept high registers so this is not
27419    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27420    that require many low registers.  */
27421 static bool
27422 arm_class_likely_spilled_p (reg_class_t rclass)
27423 {
27424   if ((TARGET_THUMB1 && rclass == LO_REGS)
27425       || rclass  == CC_REG)
27426     return true;
27427
27428   return false;
27429 }
27430
27431 /* Implements target hook small_register_classes_for_mode_p.  */
27432 bool
27433 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27434 {
27435   return TARGET_THUMB1;
27436 }
27437
27438 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27439    ARM insns and therefore guarantee that the shift count is modulo 256.
27440    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27441    guarantee no particular behavior for out-of-range counts.  */
27442
27443 static unsigned HOST_WIDE_INT
27444 arm_shift_truncation_mask (machine_mode mode)
27445 {
27446   return mode == SImode ? 255 : 0;
27447 }
27448
27449
27450 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27451
27452 unsigned int
27453 arm_dbx_register_number (unsigned int regno)
27454 {
27455   if (regno < 16)
27456     return regno;
27457
27458   if (IS_VFP_REGNUM (regno))
27459     {
27460       /* See comment in arm_dwarf_register_span.  */
27461       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27462         return 64 + regno - FIRST_VFP_REGNUM;
27463       else
27464         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27465     }
27466
27467   if (IS_IWMMXT_GR_REGNUM (regno))
27468     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27469
27470   if (IS_IWMMXT_REGNUM (regno))
27471     return 112 + regno - FIRST_IWMMXT_REGNUM;
27472
27473   return DWARF_FRAME_REGISTERS;
27474 }
27475
27476 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27477    GCC models tham as 64 32-bit registers, so we need to describe this to
27478    the DWARF generation code.  Other registers can use the default.  */
27479 static rtx
27480 arm_dwarf_register_span (rtx rtl)
27481 {
27482   machine_mode mode;
27483   unsigned regno;
27484   rtx parts[16];
27485   int nregs;
27486   int i;
27487
27488   regno = REGNO (rtl);
27489   if (!IS_VFP_REGNUM (regno))
27490     return NULL_RTX;
27491
27492   /* XXX FIXME: The EABI defines two VFP register ranges:
27493         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27494         256-287: D0-D31
27495      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27496      corresponding D register.  Until GDB supports this, we shall use the
27497      legacy encodings.  We also use these encodings for D0-D15 for
27498      compatibility with older debuggers.  */
27499   mode = GET_MODE (rtl);
27500   if (GET_MODE_SIZE (mode) < 8)
27501     return NULL_RTX;
27502
27503   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27504     {
27505       nregs = GET_MODE_SIZE (mode) / 4;
27506       for (i = 0; i < nregs; i += 2)
27507         if (TARGET_BIG_END)
27508           {
27509             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27510             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27511           }
27512         else
27513           {
27514             parts[i] = gen_rtx_REG (SImode, regno + i);
27515             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27516           }
27517     }
27518   else
27519     {
27520       nregs = GET_MODE_SIZE (mode) / 8;
27521       for (i = 0; i < nregs; i++)
27522         parts[i] = gen_rtx_REG (DImode, regno + i);
27523     }
27524
27525   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27526 }
27527
27528 #if ARM_UNWIND_INFO
27529 /* Emit unwind directives for a store-multiple instruction or stack pointer
27530    push during alignment.
27531    These should only ever be generated by the function prologue code, so
27532    expect them to have a particular form.
27533    The store-multiple instruction sometimes pushes pc as the last register,
27534    although it should not be tracked into unwind information, or for -Os
27535    sometimes pushes some dummy registers before first register that needs
27536    to be tracked in unwind information; such dummy registers are there just
27537    to avoid separate stack adjustment, and will not be restored in the
27538    epilogue.  */
27539
27540 static void
27541 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27542 {
27543   int i;
27544   HOST_WIDE_INT offset;
27545   HOST_WIDE_INT nregs;
27546   int reg_size;
27547   unsigned reg;
27548   unsigned lastreg;
27549   unsigned padfirst = 0, padlast = 0;
27550   rtx e;
27551
27552   e = XVECEXP (p, 0, 0);
27553   gcc_assert (GET_CODE (e) == SET);
27554
27555   /* First insn will adjust the stack pointer.  */
27556   gcc_assert (GET_CODE (e) == SET
27557               && REG_P (SET_DEST (e))
27558               && REGNO (SET_DEST (e)) == SP_REGNUM
27559               && GET_CODE (SET_SRC (e)) == PLUS);
27560
27561   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27562   nregs = XVECLEN (p, 0) - 1;
27563   gcc_assert (nregs);
27564
27565   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27566   if (reg < 16)
27567     {
27568       /* For -Os dummy registers can be pushed at the beginning to
27569          avoid separate stack pointer adjustment.  */
27570       e = XVECEXP (p, 0, 1);
27571       e = XEXP (SET_DEST (e), 0);
27572       if (GET_CODE (e) == PLUS)
27573         padfirst = INTVAL (XEXP (e, 1));
27574       gcc_assert (padfirst == 0 || optimize_size);
27575       /* The function prologue may also push pc, but not annotate it as it is
27576          never restored.  We turn this into a stack pointer adjustment.  */
27577       e = XVECEXP (p, 0, nregs);
27578       e = XEXP (SET_DEST (e), 0);
27579       if (GET_CODE (e) == PLUS)
27580         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27581       else
27582         padlast = offset - 4;
27583       gcc_assert (padlast == 0 || padlast == 4);
27584       if (padlast == 4)
27585         fprintf (asm_out_file, "\t.pad #4\n");
27586       reg_size = 4;
27587       fprintf (asm_out_file, "\t.save {");
27588     }
27589   else if (IS_VFP_REGNUM (reg))
27590     {
27591       reg_size = 8;
27592       fprintf (asm_out_file, "\t.vsave {");
27593     }
27594   else
27595     /* Unknown register type.  */
27596     gcc_unreachable ();
27597
27598   /* If the stack increment doesn't match the size of the saved registers,
27599      something has gone horribly wrong.  */
27600   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27601
27602   offset = padfirst;
27603   lastreg = 0;
27604   /* The remaining insns will describe the stores.  */
27605   for (i = 1; i <= nregs; i++)
27606     {
27607       /* Expect (set (mem <addr>) (reg)).
27608          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27609       e = XVECEXP (p, 0, i);
27610       gcc_assert (GET_CODE (e) == SET
27611                   && MEM_P (SET_DEST (e))
27612                   && REG_P (SET_SRC (e)));
27613
27614       reg = REGNO (SET_SRC (e));
27615       gcc_assert (reg >= lastreg);
27616
27617       if (i != 1)
27618         fprintf (asm_out_file, ", ");
27619       /* We can't use %r for vfp because we need to use the
27620          double precision register names.  */
27621       if (IS_VFP_REGNUM (reg))
27622         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27623       else
27624         asm_fprintf (asm_out_file, "%r", reg);
27625
27626       if (flag_checking)
27627         {
27628           /* Check that the addresses are consecutive.  */
27629           e = XEXP (SET_DEST (e), 0);
27630           if (GET_CODE (e) == PLUS)
27631             gcc_assert (REG_P (XEXP (e, 0))
27632                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27633                         && CONST_INT_P (XEXP (e, 1))
27634                         && offset == INTVAL (XEXP (e, 1)));
27635           else
27636             gcc_assert (i == 1
27637                         && REG_P (e)
27638                         && REGNO (e) == SP_REGNUM);
27639           offset += reg_size;
27640         }
27641     }
27642   fprintf (asm_out_file, "}\n");
27643   if (padfirst)
27644     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27645 }
27646
27647 /*  Emit unwind directives for a SET.  */
27648
27649 static void
27650 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27651 {
27652   rtx e0;
27653   rtx e1;
27654   unsigned reg;
27655
27656   e0 = XEXP (p, 0);
27657   e1 = XEXP (p, 1);
27658   switch (GET_CODE (e0))
27659     {
27660     case MEM:
27661       /* Pushing a single register.  */
27662       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27663           || !REG_P (XEXP (XEXP (e0, 0), 0))
27664           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27665         abort ();
27666
27667       asm_fprintf (asm_out_file, "\t.save ");
27668       if (IS_VFP_REGNUM (REGNO (e1)))
27669         asm_fprintf(asm_out_file, "{d%d}\n",
27670                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27671       else
27672         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27673       break;
27674
27675     case REG:
27676       if (REGNO (e0) == SP_REGNUM)
27677         {
27678           /* A stack increment.  */
27679           if (GET_CODE (e1) != PLUS
27680               || !REG_P (XEXP (e1, 0))
27681               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27682               || !CONST_INT_P (XEXP (e1, 1)))
27683             abort ();
27684
27685           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27686                        -INTVAL (XEXP (e1, 1)));
27687         }
27688       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27689         {
27690           HOST_WIDE_INT offset;
27691
27692           if (GET_CODE (e1) == PLUS)
27693             {
27694               if (!REG_P (XEXP (e1, 0))
27695                   || !CONST_INT_P (XEXP (e1, 1)))
27696                 abort ();
27697               reg = REGNO (XEXP (e1, 0));
27698               offset = INTVAL (XEXP (e1, 1));
27699               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27700                            HARD_FRAME_POINTER_REGNUM, reg,
27701                            offset);
27702             }
27703           else if (REG_P (e1))
27704             {
27705               reg = REGNO (e1);
27706               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27707                            HARD_FRAME_POINTER_REGNUM, reg);
27708             }
27709           else
27710             abort ();
27711         }
27712       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27713         {
27714           /* Move from sp to reg.  */
27715           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27716         }
27717      else if (GET_CODE (e1) == PLUS
27718               && REG_P (XEXP (e1, 0))
27719               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27720               && CONST_INT_P (XEXP (e1, 1)))
27721         {
27722           /* Set reg to offset from sp.  */
27723           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27724                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27725         }
27726       else
27727         abort ();
27728       break;
27729
27730     default:
27731       abort ();
27732     }
27733 }
27734
27735
27736 /* Emit unwind directives for the given insn.  */
27737
27738 static void
27739 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27740 {
27741   rtx note, pat;
27742   bool handled_one = false;
27743
27744   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27745     return;
27746
27747   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27748       && (TREE_NOTHROW (current_function_decl)
27749           || crtl->all_throwers_are_sibcalls))
27750     return;
27751
27752   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27753     return;
27754
27755   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27756     {
27757       switch (REG_NOTE_KIND (note))
27758         {
27759         case REG_FRAME_RELATED_EXPR:
27760           pat = XEXP (note, 0);
27761           goto found;
27762
27763         case REG_CFA_REGISTER:
27764           pat = XEXP (note, 0);
27765           if (pat == NULL)
27766             {
27767               pat = PATTERN (insn);
27768               if (GET_CODE (pat) == PARALLEL)
27769                 pat = XVECEXP (pat, 0, 0);
27770             }
27771
27772           /* Only emitted for IS_STACKALIGN re-alignment.  */
27773           {
27774             rtx dest, src;
27775             unsigned reg;
27776
27777             src = SET_SRC (pat);
27778             dest = SET_DEST (pat);
27779
27780             gcc_assert (src == stack_pointer_rtx);
27781             reg = REGNO (dest);
27782             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27783                          reg + 0x90, reg);
27784           }
27785           handled_one = true;
27786           break;
27787
27788         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27789            to get correct dwarf information for shrink-wrap.  We should not
27790            emit unwind information for it because these are used either for
27791            pretend arguments or notes to adjust sp and restore registers from
27792            stack.  */
27793         case REG_CFA_DEF_CFA:
27794         case REG_CFA_ADJUST_CFA:
27795         case REG_CFA_RESTORE:
27796           return;
27797
27798         case REG_CFA_EXPRESSION:
27799         case REG_CFA_OFFSET:
27800           /* ??? Only handling here what we actually emit.  */
27801           gcc_unreachable ();
27802
27803         default:
27804           break;
27805         }
27806     }
27807   if (handled_one)
27808     return;
27809   pat = PATTERN (insn);
27810  found:
27811
27812   switch (GET_CODE (pat))
27813     {
27814     case SET:
27815       arm_unwind_emit_set (asm_out_file, pat);
27816       break;
27817
27818     case SEQUENCE:
27819       /* Store multiple.  */
27820       arm_unwind_emit_sequence (asm_out_file, pat);
27821       break;
27822
27823     default:
27824       abort();
27825     }
27826 }
27827
27828
27829 /* Output a reference from a function exception table to the type_info
27830    object X.  The EABI specifies that the symbol should be relocated by
27831    an R_ARM_TARGET2 relocation.  */
27832
27833 static bool
27834 arm_output_ttype (rtx x)
27835 {
27836   fputs ("\t.word\t", asm_out_file);
27837   output_addr_const (asm_out_file, x);
27838   /* Use special relocations for symbol references.  */
27839   if (!CONST_INT_P (x))
27840     fputs ("(TARGET2)", asm_out_file);
27841   fputc ('\n', asm_out_file);
27842
27843   return TRUE;
27844 }
27845
27846 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27847
27848 static void
27849 arm_asm_emit_except_personality (rtx personality)
27850 {
27851   fputs ("\t.personality\t", asm_out_file);
27852   output_addr_const (asm_out_file, personality);
27853   fputc ('\n', asm_out_file);
27854 }
27855 #endif /* ARM_UNWIND_INFO */
27856
27857 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27858
27859 static void
27860 arm_asm_init_sections (void)
27861 {
27862 #if ARM_UNWIND_INFO
27863   exception_section = get_unnamed_section (0, output_section_asm_op,
27864                                            "\t.handlerdata");
27865 #endif /* ARM_UNWIND_INFO */
27866
27867 #ifdef OBJECT_FORMAT_ELF
27868   if (target_pure_code)
27869     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27870 #endif
27871 }
27872
27873 /* Output unwind directives for the start/end of a function.  */
27874
27875 void
27876 arm_output_fn_unwind (FILE * f, bool prologue)
27877 {
27878   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27879     return;
27880
27881   if (prologue)
27882     fputs ("\t.fnstart\n", f);
27883   else
27884     {
27885       /* If this function will never be unwound, then mark it as such.
27886          The came condition is used in arm_unwind_emit to suppress
27887          the frame annotations.  */
27888       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27889           && (TREE_NOTHROW (current_function_decl)
27890               || crtl->all_throwers_are_sibcalls))
27891         fputs("\t.cantunwind\n", f);
27892
27893       fputs ("\t.fnend\n", f);
27894     }
27895 }
27896
27897 static bool
27898 arm_emit_tls_decoration (FILE *fp, rtx x)
27899 {
27900   enum tls_reloc reloc;
27901   rtx val;
27902
27903   val = XVECEXP (x, 0, 0);
27904   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27905
27906   output_addr_const (fp, val);
27907
27908   switch (reloc)
27909     {
27910     case TLS_GD32:
27911       fputs ("(tlsgd)", fp);
27912       break;
27913     case TLS_LDM32:
27914       fputs ("(tlsldm)", fp);
27915       break;
27916     case TLS_LDO32:
27917       fputs ("(tlsldo)", fp);
27918       break;
27919     case TLS_IE32:
27920       fputs ("(gottpoff)", fp);
27921       break;
27922     case TLS_LE32:
27923       fputs ("(tpoff)", fp);
27924       break;
27925     case TLS_DESCSEQ:
27926       fputs ("(tlsdesc)", fp);
27927       break;
27928     default:
27929       gcc_unreachable ();
27930     }
27931
27932   switch (reloc)
27933     {
27934     case TLS_GD32:
27935     case TLS_LDM32:
27936     case TLS_IE32:
27937     case TLS_DESCSEQ:
27938       fputs (" + (. - ", fp);
27939       output_addr_const (fp, XVECEXP (x, 0, 2));
27940       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27941       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27942       output_addr_const (fp, XVECEXP (x, 0, 3));
27943       fputc (')', fp);
27944       break;
27945     default:
27946       break;
27947     }
27948
27949   return TRUE;
27950 }
27951
27952 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27953
27954 static void
27955 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27956 {
27957   gcc_assert (size == 4);
27958   fputs ("\t.word\t", file);
27959   output_addr_const (file, x);
27960   fputs ("(tlsldo)", file);
27961 }
27962
27963 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27964
27965 static bool
27966 arm_output_addr_const_extra (FILE *fp, rtx x)
27967 {
27968   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27969     return arm_emit_tls_decoration (fp, x);
27970   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27971     {
27972       char label[256];
27973       int labelno = INTVAL (XVECEXP (x, 0, 0));
27974
27975       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27976       assemble_name_raw (fp, label);
27977
27978       return TRUE;
27979     }
27980   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27981     {
27982       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27983       if (GOT_PCREL)
27984         fputs ("+.", fp);
27985       fputs ("-(", fp);
27986       output_addr_const (fp, XVECEXP (x, 0, 0));
27987       fputc (')', fp);
27988       return TRUE;
27989     }
27990   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27991     {
27992       output_addr_const (fp, XVECEXP (x, 0, 0));
27993       if (GOT_PCREL)
27994         fputs ("+.", fp);
27995       fputs ("-(", fp);
27996       output_addr_const (fp, XVECEXP (x, 0, 1));
27997       fputc (')', fp);
27998       return TRUE;
27999     }
28000   else if (GET_CODE (x) == CONST_VECTOR)
28001     return arm_emit_vector_const (fp, x);
28002
28003   return FALSE;
28004 }
28005
28006 /* Output assembly for a shift instruction.
28007    SET_FLAGS determines how the instruction modifies the condition codes.
28008    0 - Do not set condition codes.
28009    1 - Set condition codes.
28010    2 - Use smallest instruction.  */
28011 const char *
28012 arm_output_shift(rtx * operands, int set_flags)
28013 {
28014   char pattern[100];
28015   static const char flag_chars[3] = {'?', '.', '!'};
28016   const char *shift;
28017   HOST_WIDE_INT val;
28018   char c;
28019
28020   c = flag_chars[set_flags];
28021   shift = shift_op(operands[3], &val);
28022   if (shift)
28023     {
28024       if (val != -1)
28025         operands[2] = GEN_INT(val);
28026       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28027     }
28028   else
28029     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28030
28031   output_asm_insn (pattern, operands);
28032   return "";
28033 }
28034
28035 /* Output assembly for a WMMX immediate shift instruction.  */
28036 const char *
28037 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28038 {
28039   int shift = INTVAL (operands[2]);
28040   char templ[50];
28041   machine_mode opmode = GET_MODE (operands[0]);
28042
28043   gcc_assert (shift >= 0);
28044
28045   /* If the shift value in the register versions is > 63 (for D qualifier),
28046      31 (for W qualifier) or 15 (for H qualifier).  */
28047   if (((opmode == V4HImode) && (shift > 15))
28048         || ((opmode == V2SImode) && (shift > 31))
28049         || ((opmode == DImode) && (shift > 63)))
28050   {
28051     if (wror_or_wsra)
28052       {
28053         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28054         output_asm_insn (templ, operands);
28055         if (opmode == DImode)
28056           {
28057             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28058             output_asm_insn (templ, operands);
28059           }
28060       }
28061     else
28062       {
28063         /* The destination register will contain all zeros.  */
28064         sprintf (templ, "wzero\t%%0");
28065         output_asm_insn (templ, operands);
28066       }
28067     return "";
28068   }
28069
28070   if ((opmode == DImode) && (shift > 32))
28071     {
28072       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28073       output_asm_insn (templ, operands);
28074       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28075       output_asm_insn (templ, operands);
28076     }
28077   else
28078     {
28079       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28080       output_asm_insn (templ, operands);
28081     }
28082   return "";
28083 }
28084
28085 /* Output assembly for a WMMX tinsr instruction.  */
28086 const char *
28087 arm_output_iwmmxt_tinsr (rtx *operands)
28088 {
28089   int mask = INTVAL (operands[3]);
28090   int i;
28091   char templ[50];
28092   int units = mode_nunits[GET_MODE (operands[0])];
28093   gcc_assert ((mask & (mask - 1)) == 0);
28094   for (i = 0; i < units; ++i)
28095     {
28096       if ((mask & 0x01) == 1)
28097         {
28098           break;
28099         }
28100       mask >>= 1;
28101     }
28102   gcc_assert (i < units);
28103   {
28104     switch (GET_MODE (operands[0]))
28105       {
28106       case E_V8QImode:
28107         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28108         break;
28109       case E_V4HImode:
28110         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28111         break;
28112       case E_V2SImode:
28113         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28114         break;
28115       default:
28116         gcc_unreachable ();
28117         break;
28118       }
28119     output_asm_insn (templ, operands);
28120   }
28121   return "";
28122 }
28123
28124 /* Output a Thumb-1 casesi dispatch sequence.  */
28125 const char *
28126 thumb1_output_casesi (rtx *operands)
28127 {
28128   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28129
28130   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28131
28132   switch (GET_MODE(diff_vec))
28133     {
28134     case E_QImode:
28135       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28136               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28137     case E_HImode:
28138       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28139               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28140     case E_SImode:
28141       return "bl\t%___gnu_thumb1_case_si";
28142     default:
28143       gcc_unreachable ();
28144     }
28145 }
28146
28147 /* Output a Thumb-2 casesi instruction.  */
28148 const char *
28149 thumb2_output_casesi (rtx *operands)
28150 {
28151   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28152
28153   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28154
28155   output_asm_insn ("cmp\t%0, %1", operands);
28156   output_asm_insn ("bhi\t%l3", operands);
28157   switch (GET_MODE(diff_vec))
28158     {
28159     case E_QImode:
28160       return "tbb\t[%|pc, %0]";
28161     case E_HImode:
28162       return "tbh\t[%|pc, %0, lsl #1]";
28163     case E_SImode:
28164       if (flag_pic)
28165         {
28166           output_asm_insn ("adr\t%4, %l2", operands);
28167           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28168           output_asm_insn ("add\t%4, %4, %5", operands);
28169           return "bx\t%4";
28170         }
28171       else
28172         {
28173           output_asm_insn ("adr\t%4, %l2", operands);
28174           return "ldr\t%|pc, [%4, %0, lsl #2]";
28175         }
28176     default:
28177       gcc_unreachable ();
28178     }
28179 }
28180
28181 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
28182    per-core tuning structs.  */
28183 static int
28184 arm_issue_rate (void)
28185 {
28186   return current_tune->issue_rate;
28187 }
28188
28189 /* Return how many instructions should scheduler lookahead to choose the
28190    best one.  */
28191 static int
28192 arm_first_cycle_multipass_dfa_lookahead (void)
28193 {
28194   int issue_rate = arm_issue_rate ();
28195
28196   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28197 }
28198
28199 /* Enable modeling of L2 auto-prefetcher.  */
28200 static int
28201 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28202 {
28203   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28204 }
28205
28206 const char *
28207 arm_mangle_type (const_tree type)
28208 {
28209   /* The ARM ABI documents (10th October 2008) say that "__va_list"
28210      has to be managled as if it is in the "std" namespace.  */
28211   if (TARGET_AAPCS_BASED
28212       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28213     return "St9__va_list";
28214
28215   /* Half-precision float.  */
28216   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28217     return "Dh";
28218
28219   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28220      builtin type.  */
28221   if (TYPE_NAME (type) != NULL)
28222     return arm_mangle_builtin_type (type);
28223
28224   /* Use the default mangling.  */
28225   return NULL;
28226 }
28227
28228 /* Order of allocation of core registers for Thumb: this allocation is
28229    written over the corresponding initial entries of the array
28230    initialized with REG_ALLOC_ORDER.  We allocate all low registers
28231    first.  Saving and restoring a low register is usually cheaper than
28232    using a call-clobbered high register.  */
28233
28234 static const int thumb_core_reg_alloc_order[] =
28235 {
28236    3,  2,  1,  0,  4,  5,  6,  7,
28237   12, 14,  8,  9, 10, 11
28238 };
28239
28240 /* Adjust register allocation order when compiling for Thumb.  */
28241
28242 void
28243 arm_order_regs_for_local_alloc (void)
28244 {
28245   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28246   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28247   if (TARGET_THUMB)
28248     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28249             sizeof (thumb_core_reg_alloc_order));
28250 }
28251
28252 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
28253
28254 bool
28255 arm_frame_pointer_required (void)
28256 {
28257   if (SUBTARGET_FRAME_POINTER_REQUIRED)
28258     return true;
28259
28260   /* If the function receives nonlocal gotos, it needs to save the frame
28261      pointer in the nonlocal_goto_save_area object.  */
28262   if (cfun->has_nonlocal_label)
28263     return true;
28264
28265   /* The frame pointer is required for non-leaf APCS frames.  */
28266   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28267     return true;
28268
28269   /* If we are probing the stack in the prologue, we will have a faulting
28270      instruction prior to the stack adjustment and this requires a frame
28271      pointer if we want to catch the exception using the EABI unwinder.  */
28272   if (!IS_INTERRUPT (arm_current_func_type ())
28273       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28274           || flag_stack_clash_protection)
28275       && arm_except_unwind_info (&global_options) == UI_TARGET
28276       && cfun->can_throw_non_call_exceptions)
28277     {
28278       HOST_WIDE_INT size = get_frame_size ();
28279
28280       /* That's irrelevant if there is no stack adjustment.  */
28281       if (size <= 0)
28282         return false;
28283
28284       /* That's relevant only if there is a stack probe.  */
28285       if (crtl->is_leaf && !cfun->calls_alloca)
28286         {
28287           /* We don't have the final size of the frame so adjust.  */
28288           size += 32 * UNITS_PER_WORD;
28289           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28290             return true;
28291         }
28292       else
28293         return true;
28294     }
28295
28296   return false;
28297 }
28298
28299 /* Only thumb1 can't support conditional execution, so return true if
28300    the target is not thumb1.  */
28301 static bool
28302 arm_have_conditional_execution (void)
28303 {
28304   return !TARGET_THUMB1;
28305 }
28306
28307 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
28308 static HOST_WIDE_INT
28309 arm_vector_alignment (const_tree type)
28310 {
28311   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28312
28313   if (TARGET_AAPCS_BASED)
28314     align = MIN (align, 64);
28315
28316   return align;
28317 }
28318
28319 static void
28320 arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
28321 {
28322   if (!TARGET_NEON_VECTORIZE_DOUBLE)
28323     {
28324       sizes->safe_push (16);
28325       sizes->safe_push (8);
28326     }
28327 }
28328
28329 static bool
28330 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28331 {
28332   /* Vectors which aren't in packed structures will not be less aligned than
28333      the natural alignment of their element type, so this is safe.  */
28334   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28335     return !is_packed;
28336
28337   return default_builtin_vector_alignment_reachable (type, is_packed);
28338 }
28339
28340 static bool
28341 arm_builtin_support_vector_misalignment (machine_mode mode,
28342                                          const_tree type, int misalignment,
28343                                          bool is_packed)
28344 {
28345   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28346     {
28347       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28348
28349       if (is_packed)
28350         return align == 1;
28351
28352       /* If the misalignment is unknown, we should be able to handle the access
28353          so long as it is not to a member of a packed data structure.  */
28354       if (misalignment == -1)
28355         return true;
28356
28357       /* Return true if the misalignment is a multiple of the natural alignment
28358          of the vector's element type.  This is probably always going to be
28359          true in practice, since we've already established that this isn't a
28360          packed access.  */
28361       return ((misalignment % align) == 0);
28362     }
28363
28364   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28365                                                       is_packed);
28366 }
28367
28368 static void
28369 arm_conditional_register_usage (void)
28370 {
28371   int regno;
28372
28373   if (TARGET_THUMB1 && optimize_size)
28374     {
28375       /* When optimizing for size on Thumb-1, it's better not
28376         to use the HI regs, because of the overhead of
28377         stacking them.  */
28378       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28379         fixed_regs[regno] = call_used_regs[regno] = 1;
28380     }
28381
28382   /* The link register can be clobbered by any branch insn,
28383      but we have no way to track that at present, so mark
28384      it as unavailable.  */
28385   if (TARGET_THUMB1)
28386     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28387
28388   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28389     {
28390       /* VFPv3 registers are disabled when earlier VFP
28391          versions are selected due to the definition of
28392          LAST_VFP_REGNUM.  */
28393       for (regno = FIRST_VFP_REGNUM;
28394            regno <= LAST_VFP_REGNUM; ++ regno)
28395         {
28396           fixed_regs[regno] = 0;
28397           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28398             || regno >= FIRST_VFP_REGNUM + 32;
28399         }
28400     }
28401
28402   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
28403     {
28404       regno = FIRST_IWMMXT_GR_REGNUM;
28405       /* The 2002/10/09 revision of the XScale ABI has wCG0
28406          and wCG1 as call-preserved registers.  The 2002/11/21
28407          revision changed this so that all wCG registers are
28408          scratch registers.  */
28409       for (regno = FIRST_IWMMXT_GR_REGNUM;
28410            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28411         fixed_regs[regno] = 0;
28412       /* The XScale ABI has wR0 - wR9 as scratch registers,
28413          the rest as call-preserved registers.  */
28414       for (regno = FIRST_IWMMXT_REGNUM;
28415            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28416         {
28417           fixed_regs[regno] = 0;
28418           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28419         }
28420     }
28421
28422   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28423     {
28424       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28425       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28426     }
28427   else if (TARGET_APCS_STACK)
28428     {
28429       fixed_regs[10]     = 1;
28430       call_used_regs[10] = 1;
28431     }
28432   /* -mcaller-super-interworking reserves r11 for calls to
28433      _interwork_r11_call_via_rN().  Making the register global
28434      is an easy way of ensuring that it remains valid for all
28435      calls.  */
28436   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28437       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28438     {
28439       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28440       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28441       if (TARGET_CALLER_INTERWORKING)
28442         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28443     }
28444   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28445 }
28446
28447 static reg_class_t
28448 arm_preferred_rename_class (reg_class_t rclass)
28449 {
28450   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28451      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28452      and code size can be reduced.  */
28453   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28454     return LO_REGS;
28455   else
28456     return NO_REGS;
28457 }
28458
28459 /* Compute the attribute "length" of insn "*push_multi".
28460    So this function MUST be kept in sync with that insn pattern.  */
28461 int
28462 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28463 {
28464   int i, regno, hi_reg;
28465   int num_saves = XVECLEN (parallel_op, 0);
28466
28467   /* ARM mode.  */
28468   if (TARGET_ARM)
28469     return 4;
28470   /* Thumb1 mode.  */
28471   if (TARGET_THUMB1)
28472     return 2;
28473
28474   /* Thumb2 mode.  */
28475   regno = REGNO (first_op);
28476   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28477      list is 8-bit.  Normally this means all registers in the list must be
28478      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28479      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28480      with 16-bit encoding.  */
28481   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28482   for (i = 1; i < num_saves && !hi_reg; i++)
28483     {
28484       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28485       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28486     }
28487
28488   if (!hi_reg)
28489     return 2;
28490   return 4;
28491 }
28492
28493 /* Compute the attribute "length" of insn.  Currently, this function is used
28494    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28495    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28496    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28497    true if OPERANDS contains insn which explicit updates base register.  */
28498
28499 int
28500 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28501 {
28502   /* ARM mode.  */
28503   if (TARGET_ARM)
28504     return 4;
28505   /* Thumb1 mode.  */
28506   if (TARGET_THUMB1)
28507     return 2;
28508
28509   rtx parallel_op = operands[0];
28510   /* Initialize to elements number of PARALLEL.  */
28511   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28512   /* Initialize the value to base register.  */
28513   unsigned regno = REGNO (operands[1]);
28514   /* Skip return and write back pattern.
28515      We only need register pop pattern for later analysis.  */
28516   unsigned first_indx = 0;
28517   first_indx += return_pc ? 1 : 0;
28518   first_indx += write_back_p ? 1 : 0;
28519
28520   /* A pop operation can be done through LDM or POP.  If the base register is SP
28521      and if it's with write back, then a LDM will be alias of POP.  */
28522   bool pop_p = (regno == SP_REGNUM && write_back_p);
28523   bool ldm_p = !pop_p;
28524
28525   /* Check base register for LDM.  */
28526   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28527     return 4;
28528
28529   /* Check each register in the list.  */
28530   for (; indx >= first_indx; indx--)
28531     {
28532       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28533       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28534          comment in arm_attr_length_push_multi.  */
28535       if (REGNO_REG_CLASS (regno) == HI_REGS
28536           && (regno != PC_REGNUM || ldm_p))
28537         return 4;
28538     }
28539
28540   return 2;
28541 }
28542
28543 /* Compute the number of instructions emitted by output_move_double.  */
28544 int
28545 arm_count_output_move_double_insns (rtx *operands)
28546 {
28547   int count;
28548   rtx ops[2];
28549   /* output_move_double may modify the operands array, so call it
28550      here on a copy of the array.  */
28551   ops[0] = operands[0];
28552   ops[1] = operands[1];
28553   output_move_double (ops, false, &count);
28554   return count;
28555 }
28556
28557 /* Same as above, but operands are a register/memory pair in SImode.
28558    Assumes operands has the base register in position 0 and memory in position
28559    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
28560 int
28561 arm_count_ldrdstrd_insns (rtx *operands, bool load)
28562 {
28563   int count;
28564   rtx ops[2];
28565   int regnum, memnum;
28566   if (load)
28567     regnum = 0, memnum = 1;
28568   else
28569     regnum = 1, memnum = 0;
28570   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
28571   ops[memnum] = adjust_address (operands[2], DImode, 0);
28572   output_move_double (ops, false, &count);
28573   return count;
28574 }
28575
28576
28577 int
28578 vfp3_const_double_for_fract_bits (rtx operand)
28579 {
28580   REAL_VALUE_TYPE r0;
28581
28582   if (!CONST_DOUBLE_P (operand))
28583     return 0;
28584
28585   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28586   if (exact_real_inverse (DFmode, &r0)
28587       && !REAL_VALUE_NEGATIVE (r0))
28588     {
28589       if (exact_real_truncate (DFmode, &r0))
28590         {
28591           HOST_WIDE_INT value = real_to_integer (&r0);
28592           value = value & 0xffffffff;
28593           if ((value != 0) && ( (value & (value - 1)) == 0))
28594             {
28595               int ret = exact_log2 (value);
28596               gcc_assert (IN_RANGE (ret, 0, 31));
28597               return ret;
28598             }
28599         }
28600     }
28601   return 0;
28602 }
28603
28604 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28605    log2 is in [1, 32], return that log2.  Otherwise return -1.
28606    This is used in the patterns for vcvt.s32.f32 floating-point to
28607    fixed-point conversions.  */
28608
28609 int
28610 vfp3_const_double_for_bits (rtx x)
28611 {
28612   const REAL_VALUE_TYPE *r;
28613
28614   if (!CONST_DOUBLE_P (x))
28615     return -1;
28616
28617   r = CONST_DOUBLE_REAL_VALUE (x);
28618
28619   if (REAL_VALUE_NEGATIVE (*r)
28620       || REAL_VALUE_ISNAN (*r)
28621       || REAL_VALUE_ISINF (*r)
28622       || !real_isinteger (r, SFmode))
28623     return -1;
28624
28625   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28626
28627 /* The exact_log2 above will have returned -1 if this is
28628    not an exact log2.  */
28629   if (!IN_RANGE (hwint, 1, 32))
28630     return -1;
28631
28632   return hwint;
28633 }
28634
28635 \f
28636 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28637
28638 static void
28639 arm_pre_atomic_barrier (enum memmodel model)
28640 {
28641   if (need_atomic_barrier_p (model, true))
28642     emit_insn (gen_memory_barrier ());
28643 }
28644
28645 static void
28646 arm_post_atomic_barrier (enum memmodel model)
28647 {
28648   if (need_atomic_barrier_p (model, false))
28649     emit_insn (gen_memory_barrier ());
28650 }
28651
28652 /* Emit the load-exclusive and store-exclusive instructions.
28653    Use acquire and release versions if necessary.  */
28654
28655 static void
28656 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28657 {
28658   rtx (*gen) (rtx, rtx);
28659
28660   if (acq)
28661     {
28662       switch (mode)
28663         {
28664         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28665         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28666         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28667         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28668         default:
28669           gcc_unreachable ();
28670         }
28671     }
28672   else
28673     {
28674       switch (mode)
28675         {
28676         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28677         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28678         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28679         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28680         default:
28681           gcc_unreachable ();
28682         }
28683     }
28684
28685   emit_insn (gen (rval, mem));
28686 }
28687
28688 static void
28689 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28690                           rtx mem, bool rel)
28691 {
28692   rtx (*gen) (rtx, rtx, rtx);
28693
28694   if (rel)
28695     {
28696       switch (mode)
28697         {
28698         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28699         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28700         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28701         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28702         default:
28703           gcc_unreachable ();
28704         }
28705     }
28706   else
28707     {
28708       switch (mode)
28709         {
28710         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28711         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28712         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28713         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28714         default:
28715           gcc_unreachable ();
28716         }
28717     }
28718
28719   emit_insn (gen (bval, rval, mem));
28720 }
28721
28722 /* Mark the previous jump instruction as unlikely.  */
28723
28724 static void
28725 emit_unlikely_jump (rtx insn)
28726 {
28727   rtx_insn *jump = emit_jump_insn (insn);
28728   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28729 }
28730
28731 /* Expand a compare and swap pattern.  */
28732
28733 void
28734 arm_expand_compare_and_swap (rtx operands[])
28735 {
28736   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28737   machine_mode mode, cmp_mode;
28738
28739   bval = operands[0];
28740   rval = operands[1];
28741   mem = operands[2];
28742   oldval = operands[3];
28743   newval = operands[4];
28744   is_weak = operands[5];
28745   mod_s = operands[6];
28746   mod_f = operands[7];
28747   mode = GET_MODE (mem);
28748
28749   /* Normally the succ memory model must be stronger than fail, but in the
28750      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28751      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28752
28753   if (TARGET_HAVE_LDACQ
28754       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28755       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28756     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28757
28758   switch (mode)
28759     {
28760     case E_QImode:
28761     case E_HImode:
28762       /* For narrow modes, we're going to perform the comparison in SImode,
28763          so do the zero-extension now.  */
28764       rval = gen_reg_rtx (SImode);
28765       oldval = convert_modes (SImode, mode, oldval, true);
28766       /* FALLTHRU */
28767
28768     case E_SImode:
28769       /* Force the value into a register if needed.  We waited until after
28770          the zero-extension above to do this properly.  */
28771       if (!arm_add_operand (oldval, SImode))
28772         oldval = force_reg (SImode, oldval);
28773       break;
28774
28775     case E_DImode:
28776       if (!cmpdi_operand (oldval, mode))
28777         oldval = force_reg (mode, oldval);
28778       break;
28779
28780     default:
28781       gcc_unreachable ();
28782     }
28783
28784   if (TARGET_THUMB1)
28785     cmp_mode = E_SImode;
28786   else
28787     cmp_mode = CC_Zmode;
28788
28789   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28790   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
28791                                         oldval, newval, is_weak, mod_s, mod_f));
28792
28793   if (mode == QImode || mode == HImode)
28794     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28795
28796   /* In all cases, we arrange for success to be signaled by Z set.
28797      This arrangement allows for the boolean result to be used directly
28798      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28799      boolean negation of the result is also stored in bval because Thumb-1
28800      backend lacks dependency tracking for CC flag due to flag-setting not
28801      being represented at RTL level.  */
28802   if (TARGET_THUMB1)
28803       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28804   else
28805     {
28806       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28807       emit_insn (gen_rtx_SET (bval, x));
28808     }
28809 }
28810
28811 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28812    another memory store between the load-exclusive and store-exclusive can
28813    reset the monitor from Exclusive to Open state.  This means we must wait
28814    until after reload to split the pattern, lest we get a register spill in
28815    the middle of the atomic sequence.  Success of the compare and swap is
28816    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28817    for Thumb-1 targets (ie. negation of the boolean value returned by
28818    atomic_compare_and_swapmode standard pattern in operand 0).  */
28819
28820 void
28821 arm_split_compare_and_swap (rtx operands[])
28822 {
28823   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
28824   machine_mode mode;
28825   enum memmodel mod_s, mod_f;
28826   bool is_weak;
28827   rtx_code_label *label1, *label2;
28828   rtx x, cond;
28829
28830   rval = operands[1];
28831   mem = operands[2];
28832   oldval = operands[3];
28833   newval = operands[4];
28834   is_weak = (operands[5] != const0_rtx);
28835   mod_s_rtx = operands[6];
28836   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
28837   mod_f = memmodel_from_int (INTVAL (operands[7]));
28838   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28839   mode = GET_MODE (mem);
28840
28841   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28842
28843   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
28844   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
28845
28846   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28847      a full barrier is emitted after the store-release.  */
28848   if (is_armv8_sync)
28849     use_acquire = false;
28850
28851   /* Checks whether a barrier is needed and emits one accordingly.  */
28852   if (!(use_acquire || use_release))
28853     arm_pre_atomic_barrier (mod_s);
28854
28855   label1 = NULL;
28856   if (!is_weak)
28857     {
28858       label1 = gen_label_rtx ();
28859       emit_label (label1);
28860     }
28861   label2 = gen_label_rtx ();
28862
28863   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28864
28865   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28866      as required to communicate with arm_expand_compare_and_swap.  */
28867   if (TARGET_32BIT)
28868     {
28869       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28870       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28871       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28872                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28873       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28874     }
28875   else
28876     {
28877       emit_move_insn (neg_bval, const1_rtx);
28878       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28879       if (thumb1_cmpneg_operand (oldval, SImode))
28880         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28881                                                     label2, cond));
28882       else
28883         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28884     }
28885
28886   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28887
28888   /* Weak or strong, we want EQ to be true for success, so that we
28889      match the flags that we got from the compare above.  */
28890   if (TARGET_32BIT)
28891     {
28892       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28893       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28894       emit_insn (gen_rtx_SET (cond, x));
28895     }
28896
28897   if (!is_weak)
28898     {
28899       /* Z is set to boolean value of !neg_bval, as required to communicate
28900          with arm_expand_compare_and_swap.  */
28901       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28902       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28903     }
28904
28905   if (!is_mm_relaxed (mod_f))
28906     emit_label (label2);
28907
28908   /* Checks whether a barrier is needed and emits one accordingly.  */
28909   if (is_armv8_sync
28910       || !(use_acquire || use_release))
28911     arm_post_atomic_barrier (mod_s);
28912
28913   if (is_mm_relaxed (mod_f))
28914     emit_label (label2);
28915 }
28916
28917 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28918    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28919    operation).  Operation is performed on the content at MEM and on VALUE
28920    following the memory model MODEL_RTX.  The content at MEM before and after
28921    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28922    success of the operation is returned in COND.  Using a scratch register or
28923    an operand register for these determines what result is returned for that
28924    pattern.  */
28925
28926 void
28927 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28928                      rtx value, rtx model_rtx, rtx cond)
28929 {
28930   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28931   machine_mode mode = GET_MODE (mem);
28932   machine_mode wmode = (mode == DImode ? DImode : SImode);
28933   rtx_code_label *label;
28934   bool all_low_regs, bind_old_new;
28935   rtx x;
28936
28937   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28938
28939   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
28940   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
28941
28942   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28943      a full barrier is emitted after the store-release.  */
28944   if (is_armv8_sync)
28945     use_acquire = false;
28946
28947   /* Checks whether a barrier is needed and emits one accordingly.  */
28948   if (!(use_acquire || use_release))
28949     arm_pre_atomic_barrier (model);
28950
28951   label = gen_label_rtx ();
28952   emit_label (label);
28953
28954   if (new_out)
28955     new_out = gen_lowpart (wmode, new_out);
28956   if (old_out)
28957     old_out = gen_lowpart (wmode, old_out);
28958   else
28959     old_out = new_out;
28960   value = simplify_gen_subreg (wmode, value, mode, 0);
28961
28962   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28963
28964   /* Does the operation require destination and first operand to use the same
28965      register?  This is decided by register constraints of relevant insn
28966      patterns in thumb1.md.  */
28967   gcc_assert (!new_out || REG_P (new_out));
28968   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28969                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28970                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28971   bind_old_new =
28972     (TARGET_THUMB1
28973      && code != SET
28974      && code != MINUS
28975      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28976
28977   /* We want to return the old value while putting the result of the operation
28978      in the same register as the old value so copy the old value over to the
28979      destination register and use that register for the operation.  */
28980   if (old_out && bind_old_new)
28981     {
28982       emit_move_insn (new_out, old_out);
28983       old_out = new_out;
28984     }
28985
28986   switch (code)
28987     {
28988     case SET:
28989       new_out = value;
28990       break;
28991
28992     case NOT:
28993       x = gen_rtx_AND (wmode, old_out, value);
28994       emit_insn (gen_rtx_SET (new_out, x));
28995       x = gen_rtx_NOT (wmode, new_out);
28996       emit_insn (gen_rtx_SET (new_out, x));
28997       break;
28998
28999     case MINUS:
29000       if (CONST_INT_P (value))
29001         {
29002           value = GEN_INT (-INTVAL (value));
29003           code = PLUS;
29004         }
29005       /* FALLTHRU */
29006
29007     case PLUS:
29008       if (mode == DImode)
29009         {
29010           /* DImode plus/minus need to clobber flags.  */
29011           /* The adddi3 and subdi3 patterns are incorrectly written so that
29012              they require matching operands, even when we could easily support
29013              three operands.  Thankfully, this can be fixed up post-splitting,
29014              as the individual add+adc patterns do accept three operands and
29015              post-reload cprop can make these moves go away.  */
29016           emit_move_insn (new_out, old_out);
29017           if (code == PLUS)
29018             x = gen_adddi3 (new_out, new_out, value);
29019           else
29020             x = gen_subdi3 (new_out, new_out, value);
29021           emit_insn (x);
29022           break;
29023         }
29024       /* FALLTHRU */
29025
29026     default:
29027       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29028       emit_insn (gen_rtx_SET (new_out, x));
29029       break;
29030     }
29031
29032   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29033                             use_release);
29034
29035   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29036   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29037
29038   /* Checks whether a barrier is needed and emits one accordingly.  */
29039   if (is_armv8_sync
29040       || !(use_acquire || use_release))
29041     arm_post_atomic_barrier (model);
29042 }
29043 \f
29044 #define MAX_VECT_LEN 16
29045
29046 struct expand_vec_perm_d
29047 {
29048   rtx target, op0, op1;
29049   vec_perm_indices perm;
29050   machine_mode vmode;
29051   bool one_vector_p;
29052   bool testing_p;
29053 };
29054
29055 /* Generate a variable permutation.  */
29056
29057 static void
29058 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29059 {
29060   machine_mode vmode = GET_MODE (target);
29061   bool one_vector_p = rtx_equal_p (op0, op1);
29062
29063   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29064   gcc_checking_assert (GET_MODE (op0) == vmode);
29065   gcc_checking_assert (GET_MODE (op1) == vmode);
29066   gcc_checking_assert (GET_MODE (sel) == vmode);
29067   gcc_checking_assert (TARGET_NEON);
29068
29069   if (one_vector_p)
29070     {
29071       if (vmode == V8QImode)
29072         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29073       else
29074         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29075     }
29076   else
29077     {
29078       rtx pair;
29079
29080       if (vmode == V8QImode)
29081         {
29082           pair = gen_reg_rtx (V16QImode);
29083           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29084           pair = gen_lowpart (TImode, pair);
29085           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29086         }
29087       else
29088         {
29089           pair = gen_reg_rtx (OImode);
29090           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29091           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29092         }
29093     }
29094 }
29095
29096 void
29097 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29098 {
29099   machine_mode vmode = GET_MODE (target);
29100   unsigned int nelt = GET_MODE_NUNITS (vmode);
29101   bool one_vector_p = rtx_equal_p (op0, op1);
29102   rtx mask;
29103
29104   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29105      numbering of elements for big-endian, we must reverse the order.  */
29106   gcc_checking_assert (!BYTES_BIG_ENDIAN);
29107
29108   /* The VTBL instruction does not use a modulo index, so we must take care
29109      of that ourselves.  */
29110   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29111   mask = gen_const_vec_duplicate (vmode, mask);
29112   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29113
29114   arm_expand_vec_perm_1 (target, op0, op1, sel);
29115 }
29116
29117 /* Map lane ordering between architectural lane order, and GCC lane order,
29118    taking into account ABI.  See comment above output_move_neon for details.  */
29119
29120 static int
29121 neon_endian_lane_map (machine_mode mode, int lane)
29122 {
29123   if (BYTES_BIG_ENDIAN)
29124   {
29125     int nelems = GET_MODE_NUNITS (mode);
29126     /* Reverse lane order.  */
29127     lane = (nelems - 1 - lane);
29128     /* Reverse D register order, to match ABI.  */
29129     if (GET_MODE_SIZE (mode) == 16)
29130       lane = lane ^ (nelems / 2);
29131   }
29132   return lane;
29133 }
29134
29135 /* Some permutations index into pairs of vectors, this is a helper function
29136    to map indexes into those pairs of vectors.  */
29137
29138 static int
29139 neon_pair_endian_lane_map (machine_mode mode, int lane)
29140 {
29141   int nelem = GET_MODE_NUNITS (mode);
29142   if (BYTES_BIG_ENDIAN)
29143     lane =
29144       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29145   return lane;
29146 }
29147
29148 /* Generate or test for an insn that supports a constant permutation.  */
29149
29150 /* Recognize patterns for the VUZP insns.  */
29151
29152 static bool
29153 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29154 {
29155   unsigned int i, odd, mask, nelt = d->perm.length ();
29156   rtx out0, out1, in0, in1;
29157   int first_elem;
29158   int swap_nelt;
29159
29160   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29161     return false;
29162
29163   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29164      big endian pattern on 64 bit vectors, so we correct for that.  */
29165   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29166     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29167
29168   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29169
29170   if (first_elem == neon_endian_lane_map (d->vmode, 0))
29171     odd = 0;
29172   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29173     odd = 1;
29174   else
29175     return false;
29176   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29177
29178   for (i = 0; i < nelt; i++)
29179     {
29180       unsigned elt =
29181         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29182       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29183         return false;
29184     }
29185
29186   /* Success!  */
29187   if (d->testing_p)
29188     return true;
29189
29190   in0 = d->op0;
29191   in1 = d->op1;
29192   if (swap_nelt != 0)
29193     std::swap (in0, in1);
29194
29195   out0 = d->target;
29196   out1 = gen_reg_rtx (d->vmode);
29197   if (odd)
29198     std::swap (out0, out1);
29199
29200   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29201   return true;
29202 }
29203
29204 /* Recognize patterns for the VZIP insns.  */
29205
29206 static bool
29207 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29208 {
29209   unsigned int i, high, mask, nelt = d->perm.length ();
29210   rtx out0, out1, in0, in1;
29211   int first_elem;
29212   bool is_swapped;
29213
29214   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29215     return false;
29216
29217   is_swapped = BYTES_BIG_ENDIAN;
29218
29219   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29220
29221   high = nelt / 2;
29222   if (first_elem == neon_endian_lane_map (d->vmode, high))
29223     ;
29224   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29225     high = 0;
29226   else
29227     return false;
29228   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29229
29230   for (i = 0; i < nelt / 2; i++)
29231     {
29232       unsigned elt =
29233         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29234       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29235           != elt)
29236         return false;
29237       elt =
29238         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29239       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29240           != elt)
29241         return false;
29242     }
29243
29244   /* Success!  */
29245   if (d->testing_p)
29246     return true;
29247
29248   in0 = d->op0;
29249   in1 = d->op1;
29250   if (is_swapped)
29251     std::swap (in0, in1);
29252
29253   out0 = d->target;
29254   out1 = gen_reg_rtx (d->vmode);
29255   if (high)
29256     std::swap (out0, out1);
29257
29258   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29259   return true;
29260 }
29261
29262 /* Recognize patterns for the VREV insns.  */
29263 static bool
29264 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29265 {
29266   unsigned int i, j, diff, nelt = d->perm.length ();
29267   rtx (*gen) (machine_mode, rtx, rtx);
29268
29269   if (!d->one_vector_p)
29270     return false;
29271
29272   diff = d->perm[0];
29273   switch (diff)
29274     {
29275     case 7:
29276        switch (d->vmode)
29277         {
29278          case E_V16QImode:
29279          case E_V8QImode:
29280           gen = gen_neon_vrev64;
29281           break;
29282          default:
29283           return false;
29284         }
29285        break;
29286     case 3:
29287        switch (d->vmode)
29288         {
29289         case E_V16QImode:
29290         case E_V8QImode:
29291           gen = gen_neon_vrev32;
29292           break;
29293         case E_V8HImode:
29294         case E_V4HImode:
29295         case E_V8HFmode:
29296         case E_V4HFmode:
29297           gen = gen_neon_vrev64;
29298           break;
29299         default:
29300           return false;
29301         }
29302       break;
29303     case 1:
29304       switch (d->vmode)
29305         {
29306         case E_V16QImode:
29307         case E_V8QImode:
29308           gen = gen_neon_vrev16;
29309           break;
29310         case E_V8HImode:
29311         case E_V4HImode:
29312           gen = gen_neon_vrev32;
29313           break;
29314         case E_V4SImode:
29315         case E_V2SImode:
29316         case E_V4SFmode:
29317         case E_V2SFmode:
29318           gen = gen_neon_vrev64;
29319           break;
29320         default:
29321           return false;
29322         }
29323       break;
29324     default:
29325       return false;
29326     }
29327
29328   for (i = 0; i < nelt ; i += diff + 1)
29329     for (j = 0; j <= diff; j += 1)
29330       {
29331         /* This is guaranteed to be true as the value of diff
29332            is 7, 3, 1 and we should have enough elements in the
29333            queue to generate this. Getting a vector mask with a
29334            value of diff other than these values implies that
29335            something is wrong by the time we get here.  */
29336         gcc_assert (i + j < nelt);
29337         if (d->perm[i + j] != i + diff - j)
29338           return false;
29339       }
29340
29341   /* Success! */
29342   if (d->testing_p)
29343     return true;
29344
29345   emit_insn (gen (d->vmode, d->target, d->op0));
29346   return true;
29347 }
29348
29349 /* Recognize patterns for the VTRN insns.  */
29350
29351 static bool
29352 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29353 {
29354   unsigned int i, odd, mask, nelt = d->perm.length ();
29355   rtx out0, out1, in0, in1;
29356
29357   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29358     return false;
29359
29360   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29361   if (d->perm[0] == 0)
29362     odd = 0;
29363   else if (d->perm[0] == 1)
29364     odd = 1;
29365   else
29366     return false;
29367   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29368
29369   for (i = 0; i < nelt; i += 2)
29370     {
29371       if (d->perm[i] != i + odd)
29372         return false;
29373       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29374         return false;
29375     }
29376
29377   /* Success!  */
29378   if (d->testing_p)
29379     return true;
29380
29381   in0 = d->op0;
29382   in1 = d->op1;
29383   if (BYTES_BIG_ENDIAN)
29384     {
29385       std::swap (in0, in1);
29386       odd = !odd;
29387     }
29388
29389   out0 = d->target;
29390   out1 = gen_reg_rtx (d->vmode);
29391   if (odd)
29392     std::swap (out0, out1);
29393
29394   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
29395   return true;
29396 }
29397
29398 /* Recognize patterns for the VEXT insns.  */
29399
29400 static bool
29401 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29402 {
29403   unsigned int i, nelt = d->perm.length ();
29404   rtx offset;
29405
29406   unsigned int location;
29407
29408   unsigned int next  = d->perm[0] + 1;
29409
29410   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29411   if (BYTES_BIG_ENDIAN)
29412     return false;
29413
29414   /* Check if the extracted indexes are increasing by one.  */
29415   for (i = 1; i < nelt; next++, i++)
29416     {
29417       /* If we hit the most significant element of the 2nd vector in
29418          the previous iteration, no need to test further.  */
29419       if (next == 2 * nelt)
29420         return false;
29421
29422       /* If we are operating on only one vector: it could be a
29423          rotation.  If there are only two elements of size < 64, let
29424          arm_evpc_neon_vrev catch it.  */
29425       if (d->one_vector_p && (next == nelt))
29426         {
29427           if ((nelt == 2) && (d->vmode != V2DImode))
29428             return false;
29429           else
29430             next = 0;
29431         }
29432
29433       if (d->perm[i] != next)
29434         return false;
29435     }
29436
29437   location = d->perm[0];
29438
29439   /* Success! */
29440   if (d->testing_p)
29441     return true;
29442
29443   offset = GEN_INT (location);
29444
29445   if(d->vmode == E_DImode)
29446     return false;
29447
29448   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
29449   return true;
29450 }
29451
29452 /* The NEON VTBL instruction is a fully variable permuation that's even
29453    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29454    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29455    can do slightly better by expanding this as a constant where we don't
29456    have to apply a mask.  */
29457
29458 static bool
29459 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29460 {
29461   rtx rperm[MAX_VECT_LEN], sel;
29462   machine_mode vmode = d->vmode;
29463   unsigned int i, nelt = d->perm.length ();
29464
29465   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29466      numbering of elements for big-endian, we must reverse the order.  */
29467   if (BYTES_BIG_ENDIAN)
29468     return false;
29469
29470   if (d->testing_p)
29471     return true;
29472
29473   /* Generic code will try constant permutation twice.  Once with the
29474      original mode and again with the elements lowered to QImode.
29475      So wait and don't do the selector expansion ourselves.  */
29476   if (vmode != V8QImode && vmode != V16QImode)
29477     return false;
29478
29479   for (i = 0; i < nelt; ++i)
29480     rperm[i] = GEN_INT (d->perm[i]);
29481   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29482   sel = force_reg (vmode, sel);
29483
29484   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29485   return true;
29486 }
29487
29488 static bool
29489 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29490 {
29491   /* Check if the input mask matches vext before reordering the
29492      operands.  */
29493   if (TARGET_NEON)
29494     if (arm_evpc_neon_vext (d))
29495       return true;
29496
29497   /* The pattern matching functions above are written to look for a small
29498      number to begin the sequence (0, 1, N/2).  If we begin with an index
29499      from the second operand, we can swap the operands.  */
29500   unsigned int nelt = d->perm.length ();
29501   if (d->perm[0] >= nelt)
29502     {
29503       d->perm.rotate_inputs (1);
29504       std::swap (d->op0, d->op1);
29505     }
29506
29507   if (TARGET_NEON)
29508     {
29509       if (arm_evpc_neon_vuzp (d))
29510         return true;
29511       if (arm_evpc_neon_vzip (d))
29512         return true;
29513       if (arm_evpc_neon_vrev (d))
29514         return true;
29515       if (arm_evpc_neon_vtrn (d))
29516         return true;
29517       return arm_evpc_neon_vtbl (d);
29518     }
29519   return false;
29520 }
29521
29522 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
29523
29524 static bool
29525 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29526                               const vec_perm_indices &sel)
29527 {
29528   struct expand_vec_perm_d d;
29529   int i, nelt, which;
29530
29531   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29532     return false;
29533
29534   d.target = target;
29535   d.op0 = op0;
29536   d.op1 = op1;
29537
29538   d.vmode = vmode;
29539   gcc_assert (VECTOR_MODE_P (d.vmode));
29540   d.testing_p = !target;
29541
29542   nelt = GET_MODE_NUNITS (d.vmode);
29543   for (i = which = 0; i < nelt; ++i)
29544     {
29545       int ei = sel[i] & (2 * nelt - 1);
29546       which |= (ei < nelt ? 1 : 2);
29547     }
29548
29549   switch (which)
29550     {
29551     default:
29552       gcc_unreachable();
29553
29554     case 3:
29555       d.one_vector_p = false;
29556       if (d.testing_p || !rtx_equal_p (op0, op1))
29557         break;
29558
29559       /* The elements of PERM do not suggest that only the first operand
29560          is used, but both operands are identical.  Allow easier matching
29561          of the permutation by folding the permutation into the single
29562          input vector.  */
29563       /* FALLTHRU */
29564     case 2:
29565       d.op0 = op1;
29566       d.one_vector_p = true;
29567       break;
29568
29569     case 1:
29570       d.op1 = op0;
29571       d.one_vector_p = true;
29572       break;
29573     }
29574
29575   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29576
29577   if (!d.testing_p)
29578     return arm_expand_vec_perm_const_1 (&d);
29579
29580   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29581   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29582   if (!d.one_vector_p)
29583     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29584
29585   start_sequence ();
29586   bool ret = arm_expand_vec_perm_const_1 (&d);
29587   end_sequence ();
29588
29589   return ret;
29590 }
29591
29592 bool
29593 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29594 {
29595   /* If we are soft float and we do not have ldrd
29596      then all auto increment forms are ok.  */
29597   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29598     return true;
29599
29600   switch (code)
29601     {
29602       /* Post increment and Pre Decrement are supported for all
29603          instruction forms except for vector forms.  */
29604     case ARM_POST_INC:
29605     case ARM_PRE_DEC:
29606       if (VECTOR_MODE_P (mode))
29607         {
29608           if (code != ARM_PRE_DEC)
29609             return true;
29610           else
29611             return false;
29612         }
29613
29614       return true;
29615
29616     case ARM_POST_DEC:
29617     case ARM_PRE_INC:
29618       /* Without LDRD and mode size greater than
29619          word size, there is no point in auto-incrementing
29620          because ldm and stm will not have these forms.  */
29621       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29622         return false;
29623
29624       /* Vector and floating point modes do not support
29625          these auto increment forms.  */
29626       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29627         return false;
29628
29629       return true;
29630
29631     default:
29632       return false;
29633
29634     }
29635
29636   return false;
29637 }
29638
29639 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29640    on ARM, since we know that shifts by negative amounts are no-ops.
29641    Additionally, the default expansion code is not available or suitable
29642    for post-reload insn splits (this can occur when the register allocator
29643    chooses not to do a shift in NEON).
29644
29645    This function is used in both initial expand and post-reload splits, and
29646    handles all kinds of 64-bit shifts.
29647
29648    Input requirements:
29649     - It is safe for the input and output to be the same register, but
29650       early-clobber rules apply for the shift amount and scratch registers.
29651     - Shift by register requires both scratch registers.  In all other cases
29652       the scratch registers may be NULL.
29653     - Ashiftrt by a register also clobbers the CC register.  */
29654 void
29655 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29656                                rtx amount, rtx scratch1, rtx scratch2)
29657 {
29658   rtx out_high = gen_highpart (SImode, out);
29659   rtx out_low = gen_lowpart (SImode, out);
29660   rtx in_high = gen_highpart (SImode, in);
29661   rtx in_low = gen_lowpart (SImode, in);
29662
29663   /* Terminology:
29664         in = the register pair containing the input value.
29665         out = the destination register pair.
29666         up = the high- or low-part of each pair.
29667         down = the opposite part to "up".
29668      In a shift, we can consider bits to shift from "up"-stream to
29669      "down"-stream, so in a left-shift "up" is the low-part and "down"
29670      is the high-part of each register pair.  */
29671
29672   rtx out_up   = code == ASHIFT ? out_low : out_high;
29673   rtx out_down = code == ASHIFT ? out_high : out_low;
29674   rtx in_up   = code == ASHIFT ? in_low : in_high;
29675   rtx in_down = code == ASHIFT ? in_high : in_low;
29676
29677   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29678   gcc_assert (out
29679               && (REG_P (out) || GET_CODE (out) == SUBREG)
29680               && GET_MODE (out) == DImode);
29681   gcc_assert (in
29682               && (REG_P (in) || GET_CODE (in) == SUBREG)
29683               && GET_MODE (in) == DImode);
29684   gcc_assert (amount
29685               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29686                    && GET_MODE (amount) == SImode)
29687                   || CONST_INT_P (amount)));
29688   gcc_assert (scratch1 == NULL
29689               || (GET_CODE (scratch1) == SCRATCH)
29690               || (GET_MODE (scratch1) == SImode
29691                   && REG_P (scratch1)));
29692   gcc_assert (scratch2 == NULL
29693               || (GET_CODE (scratch2) == SCRATCH)
29694               || (GET_MODE (scratch2) == SImode
29695                   && REG_P (scratch2)));
29696   gcc_assert (!REG_P (out) || !REG_P (amount)
29697               || !HARD_REGISTER_P (out)
29698               || (REGNO (out) != REGNO (amount)
29699                   && REGNO (out) + 1 != REGNO (amount)));
29700
29701   /* Macros to make following code more readable.  */
29702   #define SUB_32(DEST,SRC) \
29703             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29704   #define RSB_32(DEST,SRC) \
29705             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29706   #define SUB_S_32(DEST,SRC) \
29707             gen_addsi3_compare0 ((DEST), (SRC), \
29708                                  GEN_INT (-32))
29709   #define SET(DEST,SRC) \
29710             gen_rtx_SET ((DEST), (SRC))
29711   #define SHIFT(CODE,SRC,AMOUNT) \
29712             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29713   #define LSHIFT(CODE,SRC,AMOUNT) \
29714             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29715                             SImode, (SRC), (AMOUNT))
29716   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29717             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29718                             SImode, (SRC), (AMOUNT))
29719   #define ORR(A,B) \
29720             gen_rtx_IOR (SImode, (A), (B))
29721   #define BRANCH(COND,LABEL) \
29722             gen_arm_cond_branch ((LABEL), \
29723                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29724                                                    const0_rtx), \
29725                                  cc_reg)
29726
29727   /* Shifts by register and shifts by constant are handled separately.  */
29728   if (CONST_INT_P (amount))
29729     {
29730       /* We have a shift-by-constant.  */
29731
29732       /* First, handle out-of-range shift amounts.
29733          In both cases we try to match the result an ARM instruction in a
29734          shift-by-register would give.  This helps reduce execution
29735          differences between optimization levels, but it won't stop other
29736          parts of the compiler doing different things.  This is "undefined
29737          behavior, in any case.  */
29738       if (INTVAL (amount) <= 0)
29739         emit_insn (gen_movdi (out, in));
29740       else if (INTVAL (amount) >= 64)
29741         {
29742           if (code == ASHIFTRT)
29743             {
29744               rtx const31_rtx = GEN_INT (31);
29745               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29746               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29747             }
29748           else
29749             emit_insn (gen_movdi (out, const0_rtx));
29750         }
29751
29752       /* Now handle valid shifts. */
29753       else if (INTVAL (amount) < 32)
29754         {
29755           /* Shifts by a constant less than 32.  */
29756           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29757
29758           /* Clearing the out register in DImode first avoids lots
29759              of spilling and results in less stack usage.
29760              Later this redundant insn is completely removed.
29761              Do that only if "in" and "out" are different registers.  */
29762           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29763             emit_insn (SET (out, const0_rtx));
29764           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29765           emit_insn (SET (out_down,
29766                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29767                                out_down)));
29768           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29769         }
29770       else
29771         {
29772           /* Shifts by a constant greater than 31.  */
29773           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29774
29775           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29776             emit_insn (SET (out, const0_rtx));
29777           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29778           if (code == ASHIFTRT)
29779             emit_insn (gen_ashrsi3 (out_up, in_up,
29780                                     GEN_INT (31)));
29781           else
29782             emit_insn (SET (out_up, const0_rtx));
29783         }
29784     }
29785   else
29786     {
29787       /* We have a shift-by-register.  */
29788       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29789
29790       /* This alternative requires the scratch registers.  */
29791       gcc_assert (scratch1 && REG_P (scratch1));
29792       gcc_assert (scratch2 && REG_P (scratch2));
29793
29794       /* We will need the values "amount-32" and "32-amount" later.
29795          Swapping them around now allows the later code to be more general. */
29796       switch (code)
29797         {
29798         case ASHIFT:
29799           emit_insn (SUB_32 (scratch1, amount));
29800           emit_insn (RSB_32 (scratch2, amount));
29801           break;
29802         case ASHIFTRT:
29803           emit_insn (RSB_32 (scratch1, amount));
29804           /* Also set CC = amount > 32.  */
29805           emit_insn (SUB_S_32 (scratch2, amount));
29806           break;
29807         case LSHIFTRT:
29808           emit_insn (RSB_32 (scratch1, amount));
29809           emit_insn (SUB_32 (scratch2, amount));
29810           break;
29811         default:
29812           gcc_unreachable ();
29813         }
29814
29815       /* Emit code like this:
29816
29817          arithmetic-left:
29818             out_down = in_down << amount;
29819             out_down = (in_up << (amount - 32)) | out_down;
29820             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29821             out_up = in_up << amount;
29822
29823          arithmetic-right:
29824             out_down = in_down >> amount;
29825             out_down = (in_up << (32 - amount)) | out_down;
29826             if (amount < 32)
29827               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29828             out_up = in_up << amount;
29829
29830          logical-right:
29831             out_down = in_down >> amount;
29832             out_down = (in_up << (32 - amount)) | out_down;
29833             if (amount < 32)
29834               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29835             out_up = in_up << amount;
29836
29837           The ARM and Thumb2 variants are the same but implemented slightly
29838           differently.  If this were only called during expand we could just
29839           use the Thumb2 case and let combine do the right thing, but this
29840           can also be called from post-reload splitters.  */
29841
29842       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29843
29844       if (!TARGET_THUMB2)
29845         {
29846           /* Emit code for ARM mode.  */
29847           emit_insn (SET (out_down,
29848                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29849           if (code == ASHIFTRT)
29850             {
29851               rtx_code_label *done_label = gen_label_rtx ();
29852               emit_jump_insn (BRANCH (LT, done_label));
29853               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29854                                              out_down)));
29855               emit_label (done_label);
29856             }
29857           else
29858             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29859                                            out_down)));
29860         }
29861       else
29862         {
29863           /* Emit code for Thumb2 mode.
29864              Thumb2 can't do shift and or in one insn.  */
29865           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29866           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29867
29868           if (code == ASHIFTRT)
29869             {
29870               rtx_code_label *done_label = gen_label_rtx ();
29871               emit_jump_insn (BRANCH (LT, done_label));
29872               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29873               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29874               emit_label (done_label);
29875             }
29876           else
29877             {
29878               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29879               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29880             }
29881         }
29882
29883       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29884     }
29885
29886   #undef SUB_32
29887   #undef RSB_32
29888   #undef SUB_S_32
29889   #undef SET
29890   #undef SHIFT
29891   #undef LSHIFT
29892   #undef REV_LSHIFT
29893   #undef ORR
29894   #undef BRANCH
29895 }
29896
29897 /* Returns true if the pattern is a valid symbolic address, which is either a
29898    symbol_ref or (symbol_ref + addend).
29899
29900    According to the ARM ELF ABI, the initial addend of REL-type relocations
29901    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29902    literal field of the instruction as a 16-bit signed value in the range
29903    -32768 <= A < 32768.  */
29904
29905 bool
29906 arm_valid_symbolic_address_p (rtx addr)
29907 {
29908   rtx xop0, xop1 = NULL_RTX;
29909   rtx tmp = addr;
29910
29911   if (target_word_relocations)
29912     return false;
29913
29914   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29915     return true;
29916
29917   /* (const (plus: symbol_ref const_int))  */
29918   if (GET_CODE (addr) == CONST)
29919     tmp = XEXP (addr, 0);
29920
29921   if (GET_CODE (tmp) == PLUS)
29922     {
29923       xop0 = XEXP (tmp, 0);
29924       xop1 = XEXP (tmp, 1);
29925
29926       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29927           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29928     }
29929
29930   return false;
29931 }
29932
29933 /* Returns true if a valid comparison operation and makes
29934    the operands in a form that is valid.  */
29935 bool
29936 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29937 {
29938   enum rtx_code code = GET_CODE (*comparison);
29939   int code_int;
29940   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29941     ? GET_MODE (*op2) : GET_MODE (*op1);
29942
29943   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29944
29945   if (code == UNEQ || code == LTGT)
29946     return false;
29947
29948   code_int = (int)code;
29949   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29950   PUT_CODE (*comparison, (enum rtx_code)code_int);
29951
29952   switch (mode)
29953     {
29954     case E_SImode:
29955       if (!arm_add_operand (*op1, mode))
29956         *op1 = force_reg (mode, *op1);
29957       if (!arm_add_operand (*op2, mode))
29958         *op2 = force_reg (mode, *op2);
29959       return true;
29960
29961     case E_DImode:
29962       if (!cmpdi_operand (*op1, mode))
29963         *op1 = force_reg (mode, *op1);
29964       if (!cmpdi_operand (*op2, mode))
29965         *op2 = force_reg (mode, *op2);
29966       return true;
29967
29968     case E_HFmode:
29969       if (!TARGET_VFP_FP16INST)
29970         break;
29971       /* FP16 comparisons are done in SF mode.  */
29972       mode = SFmode;
29973       *op1 = convert_to_mode (mode, *op1, 1);
29974       *op2 = convert_to_mode (mode, *op2, 1);
29975       /* Fall through.  */
29976     case E_SFmode:
29977     case E_DFmode:
29978       if (!vfp_compare_operand (*op1, mode))
29979         *op1 = force_reg (mode, *op1);
29980       if (!vfp_compare_operand (*op2, mode))
29981         *op2 = force_reg (mode, *op2);
29982       return true;
29983     default:
29984       break;
29985     }
29986
29987   return false;
29988
29989 }
29990
29991 /* Maximum number of instructions to set block of memory.  */
29992 static int
29993 arm_block_set_max_insns (void)
29994 {
29995   if (optimize_function_for_size_p (cfun))
29996     return 4;
29997   else
29998     return current_tune->max_insns_inline_memset;
29999 }
30000
30001 /* Return TRUE if it's profitable to set block of memory for
30002    non-vectorized case.  VAL is the value to set the memory
30003    with.  LENGTH is the number of bytes to set.  ALIGN is the
30004    alignment of the destination memory in bytes.  UNALIGNED_P
30005    is TRUE if we can only set the memory with instructions
30006    meeting alignment requirements.  USE_STRD_P is TRUE if we
30007    can use strd to set the memory.  */
30008 static bool
30009 arm_block_set_non_vect_profit_p (rtx val,
30010                                  unsigned HOST_WIDE_INT length,
30011                                  unsigned HOST_WIDE_INT align,
30012                                  bool unaligned_p, bool use_strd_p)
30013 {
30014   int num = 0;
30015   /* For leftovers in bytes of 0-7, we can set the memory block using
30016      strb/strh/str with minimum instruction number.  */
30017   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30018
30019   if (unaligned_p)
30020     {
30021       num = arm_const_inline_cost (SET, val);
30022       num += length / align + length % align;
30023     }
30024   else if (use_strd_p)
30025     {
30026       num = arm_const_double_inline_cost (val);
30027       num += (length >> 3) + leftover[length & 7];
30028     }
30029   else
30030     {
30031       num = arm_const_inline_cost (SET, val);
30032       num += (length >> 2) + leftover[length & 3];
30033     }
30034
30035   /* We may be able to combine last pair STRH/STRB into a single STR
30036      by shifting one byte back.  */
30037   if (unaligned_access && length > 3 && (length & 3) == 3)
30038     num--;
30039
30040   return (num <= arm_block_set_max_insns ());
30041 }
30042
30043 /* Return TRUE if it's profitable to set block of memory for
30044    vectorized case.  LENGTH is the number of bytes to set.
30045    ALIGN is the alignment of destination memory in bytes.
30046    MODE is the vector mode used to set the memory.  */
30047 static bool
30048 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
30049                              unsigned HOST_WIDE_INT align,
30050                              machine_mode mode)
30051 {
30052   int num;
30053   bool unaligned_p = ((align & 3) != 0);
30054   unsigned int nelt = GET_MODE_NUNITS (mode);
30055
30056   /* Instruction loading constant value.  */
30057   num = 1;
30058   /* Instructions storing the memory.  */
30059   num += (length + nelt - 1) / nelt;
30060   /* Instructions adjusting the address expression.  Only need to
30061      adjust address expression if it's 4 bytes aligned and bytes
30062      leftover can only be stored by mis-aligned store instruction.  */
30063   if (!unaligned_p && (length & 3) != 0)
30064     num++;
30065
30066   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
30067   if (!unaligned_p && mode == V16QImode)
30068     num--;
30069
30070   return (num <= arm_block_set_max_insns ());
30071 }
30072
30073 /* Set a block of memory using vectorization instructions for the
30074    unaligned case.  We fill the first LENGTH bytes of the memory
30075    area starting from DSTBASE with byte constant VALUE.  ALIGN is
30076    the alignment requirement of memory.  Return TRUE if succeeded.  */
30077 static bool
30078 arm_block_set_unaligned_vect (rtx dstbase,
30079                               unsigned HOST_WIDE_INT length,
30080                               unsigned HOST_WIDE_INT value,
30081                               unsigned HOST_WIDE_INT align)
30082 {
30083   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30084   rtx dst, mem;
30085   rtx val_vec, reg;
30086   rtx (*gen_func) (rtx, rtx);
30087   machine_mode mode;
30088   unsigned HOST_WIDE_INT v = value;
30089   unsigned int offset = 0;
30090   gcc_assert ((align & 0x3) != 0);
30091   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30092   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30093   if (length >= nelt_v16)
30094     {
30095       mode = V16QImode;
30096       gen_func = gen_movmisalignv16qi;
30097     }
30098   else
30099     {
30100       mode = V8QImode;
30101       gen_func = gen_movmisalignv8qi;
30102     }
30103   nelt_mode = GET_MODE_NUNITS (mode);
30104   gcc_assert (length >= nelt_mode);
30105   /* Skip if it isn't profitable.  */
30106   if (!arm_block_set_vect_profit_p (length, align, mode))
30107     return false;
30108
30109   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30110   mem = adjust_automodify_address (dstbase, mode, dst, offset);
30111
30112   v = sext_hwi (v, BITS_PER_WORD);
30113
30114   reg = gen_reg_rtx (mode);
30115   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30116   /* Emit instruction loading the constant value.  */
30117   emit_move_insn (reg, val_vec);
30118
30119   /* Handle nelt_mode bytes in a vector.  */
30120   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30121     {
30122       emit_insn ((*gen_func) (mem, reg));
30123       if (i + 2 * nelt_mode <= length)
30124         {
30125           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30126           offset += nelt_mode;
30127           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30128         }
30129     }
30130
30131   /* If there are not less than nelt_v8 bytes leftover, we must be in
30132      V16QI mode.  */
30133   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30134
30135   /* Handle (8, 16) bytes leftover.  */
30136   if (i + nelt_v8 < length)
30137     {
30138       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30139       offset += length - i;
30140       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30141
30142       /* We are shifting bytes back, set the alignment accordingly.  */
30143       if ((length & 1) != 0 && align >= 2)
30144         set_mem_align (mem, BITS_PER_UNIT);
30145
30146       emit_insn (gen_movmisalignv16qi (mem, reg));
30147     }
30148   /* Handle (0, 8] bytes leftover.  */
30149   else if (i < length && i + nelt_v8 >= length)
30150     {
30151       if (mode == V16QImode)
30152         reg = gen_lowpart (V8QImode, reg);
30153
30154       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30155                                               + (nelt_mode - nelt_v8))));
30156       offset += (length - i) + (nelt_mode - nelt_v8);
30157       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30158
30159       /* We are shifting bytes back, set the alignment accordingly.  */
30160       if ((length & 1) != 0 && align >= 2)
30161         set_mem_align (mem, BITS_PER_UNIT);
30162
30163       emit_insn (gen_movmisalignv8qi (mem, reg));
30164     }
30165
30166   return true;
30167 }
30168
30169 /* Set a block of memory using vectorization instructions for the
30170    aligned case.  We fill the first LENGTH bytes of the memory area
30171    starting from DSTBASE with byte constant VALUE.  ALIGN is the
30172    alignment requirement of memory.  Return TRUE if succeeded.  */
30173 static bool
30174 arm_block_set_aligned_vect (rtx dstbase,
30175                             unsigned HOST_WIDE_INT length,
30176                             unsigned HOST_WIDE_INT value,
30177                             unsigned HOST_WIDE_INT align)
30178 {
30179   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30180   rtx dst, addr, mem;
30181   rtx val_vec, reg;
30182   machine_mode mode;
30183   unsigned int offset = 0;
30184
30185   gcc_assert ((align & 0x3) == 0);
30186   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30187   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30188   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30189     mode = V16QImode;
30190   else
30191     mode = V8QImode;
30192
30193   nelt_mode = GET_MODE_NUNITS (mode);
30194   gcc_assert (length >= nelt_mode);
30195   /* Skip if it isn't profitable.  */
30196   if (!arm_block_set_vect_profit_p (length, align, mode))
30197     return false;
30198
30199   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30200
30201   reg = gen_reg_rtx (mode);
30202   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30203   /* Emit instruction loading the constant value.  */
30204   emit_move_insn (reg, val_vec);
30205
30206   i = 0;
30207   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
30208   if (mode == V16QImode)
30209     {
30210       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30211       emit_insn (gen_movmisalignv16qi (mem, reg));
30212       i += nelt_mode;
30213       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
30214       if (i + nelt_v8 < length && i + nelt_v16 > length)
30215         {
30216           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30217           offset += length - nelt_mode;
30218           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30219           /* We are shifting bytes back, set the alignment accordingly.  */
30220           if ((length & 0x3) == 0)
30221             set_mem_align (mem, BITS_PER_UNIT * 4);
30222           else if ((length & 0x1) == 0)
30223             set_mem_align (mem, BITS_PER_UNIT * 2);
30224           else
30225             set_mem_align (mem, BITS_PER_UNIT);
30226
30227           emit_insn (gen_movmisalignv16qi (mem, reg));
30228           return true;
30229         }
30230       /* Fall through for bytes leftover.  */
30231       mode = V8QImode;
30232       nelt_mode = GET_MODE_NUNITS (mode);
30233       reg = gen_lowpart (V8QImode, reg);
30234     }
30235
30236   /* Handle 8 bytes in a vector.  */
30237   for (; (i + nelt_mode <= length); i += nelt_mode)
30238     {
30239       addr = plus_constant (Pmode, dst, i);
30240       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30241       emit_move_insn (mem, reg);
30242     }
30243
30244   /* Handle single word leftover by shifting 4 bytes back.  We can
30245      use aligned access for this case.  */
30246   if (i + UNITS_PER_WORD == length)
30247     {
30248       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30249       offset += i - UNITS_PER_WORD;
30250       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30251       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30252       if (align > UNITS_PER_WORD)
30253         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30254
30255       emit_move_insn (mem, reg);
30256     }
30257   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30258      We have to use unaligned access for this case.  */
30259   else if (i < length)
30260     {
30261       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30262       offset += length - nelt_mode;
30263       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30264       /* We are shifting bytes back, set the alignment accordingly.  */
30265       if ((length & 1) == 0)
30266         set_mem_align (mem, BITS_PER_UNIT * 2);
30267       else
30268         set_mem_align (mem, BITS_PER_UNIT);
30269
30270       emit_insn (gen_movmisalignv8qi (mem, reg));
30271     }
30272
30273   return true;
30274 }
30275
30276 /* Set a block of memory using plain strh/strb instructions, only
30277    using instructions allowed by ALIGN on processor.  We fill the
30278    first LENGTH bytes of the memory area starting from DSTBASE
30279    with byte constant VALUE.  ALIGN is the alignment requirement
30280    of memory.  */
30281 static bool
30282 arm_block_set_unaligned_non_vect (rtx dstbase,
30283                                   unsigned HOST_WIDE_INT length,
30284                                   unsigned HOST_WIDE_INT value,
30285                                   unsigned HOST_WIDE_INT align)
30286 {
30287   unsigned int i;
30288   rtx dst, addr, mem;
30289   rtx val_exp, val_reg, reg;
30290   machine_mode mode;
30291   HOST_WIDE_INT v = value;
30292
30293   gcc_assert (align == 1 || align == 2);
30294
30295   if (align == 2)
30296     v |= (value << BITS_PER_UNIT);
30297
30298   v = sext_hwi (v, BITS_PER_WORD);
30299   val_exp = GEN_INT (v);
30300   /* Skip if it isn't profitable.  */
30301   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30302                                         align, true, false))
30303     return false;
30304
30305   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30306   mode = (align == 2 ? HImode : QImode);
30307   val_reg = force_reg (SImode, val_exp);
30308   reg = gen_lowpart (mode, val_reg);
30309
30310   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30311     {
30312       addr = plus_constant (Pmode, dst, i);
30313       mem = adjust_automodify_address (dstbase, mode, addr, i);
30314       emit_move_insn (mem, reg);
30315     }
30316
30317   /* Handle single byte leftover.  */
30318   if (i + 1 == length)
30319     {
30320       reg = gen_lowpart (QImode, val_reg);
30321       addr = plus_constant (Pmode, dst, i);
30322       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30323       emit_move_insn (mem, reg);
30324       i++;
30325     }
30326
30327   gcc_assert (i == length);
30328   return true;
30329 }
30330
30331 /* Set a block of memory using plain strd/str/strh/strb instructions,
30332    to permit unaligned copies on processors which support unaligned
30333    semantics for those instructions.  We fill the first LENGTH bytes
30334    of the memory area starting from DSTBASE with byte constant VALUE.
30335    ALIGN is the alignment requirement of memory.  */
30336 static bool
30337 arm_block_set_aligned_non_vect (rtx dstbase,
30338                                 unsigned HOST_WIDE_INT length,
30339                                 unsigned HOST_WIDE_INT value,
30340                                 unsigned HOST_WIDE_INT align)
30341 {
30342   unsigned int i;
30343   rtx dst, addr, mem;
30344   rtx val_exp, val_reg, reg;
30345   unsigned HOST_WIDE_INT v;
30346   bool use_strd_p;
30347
30348   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30349                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30350
30351   v = (value | (value << 8) | (value << 16) | (value << 24));
30352   if (length < UNITS_PER_WORD)
30353     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30354
30355   if (use_strd_p)
30356     v |= (v << BITS_PER_WORD);
30357   else
30358     v = sext_hwi (v, BITS_PER_WORD);
30359
30360   val_exp = GEN_INT (v);
30361   /* Skip if it isn't profitable.  */
30362   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30363                                         align, false, use_strd_p))
30364     {
30365       if (!use_strd_p)
30366         return false;
30367
30368       /* Try without strd.  */
30369       v = (v >> BITS_PER_WORD);
30370       v = sext_hwi (v, BITS_PER_WORD);
30371       val_exp = GEN_INT (v);
30372       use_strd_p = false;
30373       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30374                                             align, false, use_strd_p))
30375         return false;
30376     }
30377
30378   i = 0;
30379   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30380   /* Handle double words using strd if possible.  */
30381   if (use_strd_p)
30382     {
30383       val_reg = force_reg (DImode, val_exp);
30384       reg = val_reg;
30385       for (; (i + 8 <= length); i += 8)
30386         {
30387           addr = plus_constant (Pmode, dst, i);
30388           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30389           emit_move_insn (mem, reg);
30390         }
30391     }
30392   else
30393     val_reg = force_reg (SImode, val_exp);
30394
30395   /* Handle words.  */
30396   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30397   for (; (i + 4 <= length); i += 4)
30398     {
30399       addr = plus_constant (Pmode, dst, i);
30400       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30401       if ((align & 3) == 0)
30402         emit_move_insn (mem, reg);
30403       else
30404         emit_insn (gen_unaligned_storesi (mem, reg));
30405     }
30406
30407   /* Merge last pair of STRH and STRB into a STR if possible.  */
30408   if (unaligned_access && i > 0 && (i + 3) == length)
30409     {
30410       addr = plus_constant (Pmode, dst, i - 1);
30411       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30412       /* We are shifting one byte back, set the alignment accordingly.  */
30413       if ((align & 1) == 0)
30414         set_mem_align (mem, BITS_PER_UNIT);
30415
30416       /* Most likely this is an unaligned access, and we can't tell at
30417          compilation time.  */
30418       emit_insn (gen_unaligned_storesi (mem, reg));
30419       return true;
30420     }
30421
30422   /* Handle half word leftover.  */
30423   if (i + 2 <= length)
30424     {
30425       reg = gen_lowpart (HImode, val_reg);
30426       addr = plus_constant (Pmode, dst, i);
30427       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30428       if ((align & 1) == 0)
30429         emit_move_insn (mem, reg);
30430       else
30431         emit_insn (gen_unaligned_storehi (mem, reg));
30432
30433       i += 2;
30434     }
30435
30436   /* Handle single byte leftover.  */
30437   if (i + 1 == length)
30438     {
30439       reg = gen_lowpart (QImode, val_reg);
30440       addr = plus_constant (Pmode, dst, i);
30441       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30442       emit_move_insn (mem, reg);
30443     }
30444
30445   return true;
30446 }
30447
30448 /* Set a block of memory using vectorization instructions for both
30449    aligned and unaligned cases.  We fill the first LENGTH bytes of
30450    the memory area starting from DSTBASE with byte constant VALUE.
30451    ALIGN is the alignment requirement of memory.  */
30452 static bool
30453 arm_block_set_vect (rtx dstbase,
30454                     unsigned HOST_WIDE_INT length,
30455                     unsigned HOST_WIDE_INT value,
30456                     unsigned HOST_WIDE_INT align)
30457 {
30458   /* Check whether we need to use unaligned store instruction.  */
30459   if (((align & 3) != 0 || (length & 3) != 0)
30460       /* Check whether unaligned store instruction is available.  */
30461       && (!unaligned_access || BYTES_BIG_ENDIAN))
30462     return false;
30463
30464   if ((align & 3) == 0)
30465     return arm_block_set_aligned_vect (dstbase, length, value, align);
30466   else
30467     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30468 }
30469
30470 /* Expand string store operation.  Firstly we try to do that by using
30471    vectorization instructions, then try with ARM unaligned access and
30472    double-word store if profitable.  OPERANDS[0] is the destination,
30473    OPERANDS[1] is the number of bytes, operands[2] is the value to
30474    initialize the memory, OPERANDS[3] is the known alignment of the
30475    destination.  */
30476 bool
30477 arm_gen_setmem (rtx *operands)
30478 {
30479   rtx dstbase = operands[0];
30480   unsigned HOST_WIDE_INT length;
30481   unsigned HOST_WIDE_INT value;
30482   unsigned HOST_WIDE_INT align;
30483
30484   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30485     return false;
30486
30487   length = UINTVAL (operands[1]);
30488   if (length > 64)
30489     return false;
30490
30491   value = (UINTVAL (operands[2]) & 0xFF);
30492   align = UINTVAL (operands[3]);
30493   if (TARGET_NEON && length >= 8
30494       && current_tune->string_ops_prefer_neon
30495       && arm_block_set_vect (dstbase, length, value, align))
30496     return true;
30497
30498   if (!unaligned_access && (align & 3) != 0)
30499     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30500
30501   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30502 }
30503
30504
30505 static bool
30506 arm_macro_fusion_p (void)
30507 {
30508   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30509 }
30510
30511 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30512    for MOVW / MOVT macro fusion.  */
30513
30514 static bool
30515 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30516 {
30517   /* We are trying to fuse
30518      movw imm / movt imm
30519     instructions as a group that gets scheduled together.  */
30520
30521   rtx set_dest = SET_DEST (curr_set);
30522
30523   if (GET_MODE (set_dest) != SImode)
30524     return false;
30525
30526   /* We are trying to match:
30527      prev (movw)  == (set (reg r0) (const_int imm16))
30528      curr (movt) == (set (zero_extract (reg r0)
30529                                         (const_int 16)
30530                                         (const_int 16))
30531                           (const_int imm16_1))
30532      or
30533      prev (movw) == (set (reg r1)
30534                           (high (symbol_ref ("SYM"))))
30535     curr (movt) == (set (reg r0)
30536                         (lo_sum (reg r1)
30537                                 (symbol_ref ("SYM"))))  */
30538
30539     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30540       {
30541         if (CONST_INT_P (SET_SRC (curr_set))
30542             && CONST_INT_P (SET_SRC (prev_set))
30543             && REG_P (XEXP (set_dest, 0))
30544             && REG_P (SET_DEST (prev_set))
30545             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30546           return true;
30547
30548       }
30549     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30550              && REG_P (SET_DEST (curr_set))
30551              && REG_P (SET_DEST (prev_set))
30552              && GET_CODE (SET_SRC (prev_set)) == HIGH
30553              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30554       return true;
30555
30556   return false;
30557 }
30558
30559 static bool
30560 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30561 {
30562   rtx prev_set = single_set (prev);
30563   rtx curr_set = single_set (curr);
30564
30565   if (!prev_set
30566       || !curr_set)
30567     return false;
30568
30569   if (any_condjump_p (curr))
30570     return false;
30571
30572   if (!arm_macro_fusion_p ())
30573     return false;
30574
30575   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30576       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30577     return true;
30578
30579   return false;
30580 }
30581
30582 /* Return true iff the instruction fusion described by OP is enabled.  */
30583 bool
30584 arm_fusion_enabled_p (tune_params::fuse_ops op)
30585 {
30586   return current_tune->fusible_ops & op;
30587 }
30588
30589 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30590    scheduled for speculative execution.  Reject the long-running division
30591    and square-root instructions.  */
30592
30593 static bool
30594 arm_sched_can_speculate_insn (rtx_insn *insn)
30595 {
30596   switch (get_attr_type (insn))
30597     {
30598       case TYPE_SDIV:
30599       case TYPE_UDIV:
30600       case TYPE_FDIVS:
30601       case TYPE_FDIVD:
30602       case TYPE_FSQRTS:
30603       case TYPE_FSQRTD:
30604       case TYPE_NEON_FP_SQRT_S:
30605       case TYPE_NEON_FP_SQRT_D:
30606       case TYPE_NEON_FP_SQRT_S_Q:
30607       case TYPE_NEON_FP_SQRT_D_Q:
30608       case TYPE_NEON_FP_DIV_S:
30609       case TYPE_NEON_FP_DIV_D:
30610       case TYPE_NEON_FP_DIV_S_Q:
30611       case TYPE_NEON_FP_DIV_D_Q:
30612         return false;
30613       default:
30614         return true;
30615     }
30616 }
30617
30618 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30619
30620 static unsigned HOST_WIDE_INT
30621 arm_asan_shadow_offset (void)
30622 {
30623   return HOST_WIDE_INT_1U << 29;
30624 }
30625
30626
30627 /* This is a temporary fix for PR60655.  Ideally we need
30628    to handle most of these cases in the generic part but
30629    currently we reject minus (..) (sym_ref).  We try to
30630    ameliorate the case with minus (sym_ref1) (sym_ref2)
30631    where they are in the same section.  */
30632
30633 static bool
30634 arm_const_not_ok_for_debug_p (rtx p)
30635 {
30636   tree decl_op0 = NULL;
30637   tree decl_op1 = NULL;
30638
30639   if (GET_CODE (p) == UNSPEC)
30640     return true;
30641   if (GET_CODE (p) == MINUS)
30642     {
30643       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30644         {
30645           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30646           if (decl_op1
30647               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30648               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30649             {
30650               if ((VAR_P (decl_op1)
30651                    || TREE_CODE (decl_op1) == CONST_DECL)
30652                   && (VAR_P (decl_op0)
30653                       || TREE_CODE (decl_op0) == CONST_DECL))
30654                 return (get_variable_section (decl_op1, false)
30655                         != get_variable_section (decl_op0, false));
30656
30657               if (TREE_CODE (decl_op1) == LABEL_DECL
30658                   && TREE_CODE (decl_op0) == LABEL_DECL)
30659                 return (DECL_CONTEXT (decl_op1)
30660                         != DECL_CONTEXT (decl_op0));
30661             }
30662
30663           return true;
30664         }
30665     }
30666
30667   return false;
30668 }
30669
30670 /* return TRUE if x is a reference to a value in a constant pool */
30671 extern bool
30672 arm_is_constant_pool_ref (rtx x)
30673 {
30674   return (MEM_P (x)
30675           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30676           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30677 }
30678
30679 /* Remember the last target of arm_set_current_function.  */
30680 static GTY(()) tree arm_previous_fndecl;
30681
30682 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30683
30684 void
30685 save_restore_target_globals (tree new_tree)
30686 {
30687   /* If we have a previous state, use it.  */
30688   if (TREE_TARGET_GLOBALS (new_tree))
30689     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30690   else if (new_tree == target_option_default_node)
30691     restore_target_globals (&default_target_globals);
30692   else
30693     {
30694       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30695       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30696     }
30697
30698   arm_option_params_internal ();
30699 }
30700
30701 /* Invalidate arm_previous_fndecl.  */
30702
30703 void
30704 arm_reset_previous_fndecl (void)
30705 {
30706   arm_previous_fndecl = NULL_TREE;
30707 }
30708
30709 /* Establish appropriate back-end context for processing the function
30710    FNDECL.  The argument might be NULL to indicate processing at top
30711    level, outside of any function scope.  */
30712
30713 static void
30714 arm_set_current_function (tree fndecl)
30715 {
30716   if (!fndecl || fndecl == arm_previous_fndecl)
30717     return;
30718
30719   tree old_tree = (arm_previous_fndecl
30720                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30721                    : NULL_TREE);
30722
30723   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30724
30725   /* If current function has no attributes but previous one did,
30726      use the default node.  */
30727   if (! new_tree && old_tree)
30728     new_tree = target_option_default_node;
30729
30730   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30731      the default have been handled by save_restore_target_globals from
30732      arm_pragma_target_parse.  */
30733   if (old_tree == new_tree)
30734     return;
30735
30736   arm_previous_fndecl = fndecl;
30737
30738   /* First set the target options.  */
30739   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30740
30741   save_restore_target_globals (new_tree);
30742 }
30743
30744 /* Implement TARGET_OPTION_PRINT.  */
30745
30746 static void
30747 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30748 {
30749   int flags = ptr->x_target_flags;
30750   const char *fpu_name;
30751
30752   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30753               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30754
30755   fprintf (file, "%*sselected isa %s\n", indent, "",
30756            TARGET_THUMB2_P (flags) ? "thumb2" :
30757            TARGET_THUMB_P (flags) ? "thumb1" :
30758            "arm");
30759
30760   if (ptr->x_arm_arch_string)
30761     fprintf (file, "%*sselected architecture %s\n", indent, "",
30762              ptr->x_arm_arch_string);
30763
30764   if (ptr->x_arm_cpu_string)
30765     fprintf (file, "%*sselected CPU %s\n", indent, "",
30766              ptr->x_arm_cpu_string);
30767
30768   if (ptr->x_arm_tune_string)
30769     fprintf (file, "%*sselected tune %s\n", indent, "",
30770              ptr->x_arm_tune_string);
30771
30772   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30773 }
30774
30775 /* Hook to determine if one function can safely inline another.  */
30776
30777 static bool
30778 arm_can_inline_p (tree caller, tree callee)
30779 {
30780   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30781   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30782   bool can_inline = true;
30783
30784   struct cl_target_option *caller_opts
30785         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30786                                            : target_option_default_node);
30787
30788   struct cl_target_option *callee_opts
30789         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30790                                            : target_option_default_node);
30791
30792   if (callee_opts == caller_opts)
30793     return true;
30794
30795   /* Callee's ISA features should be a subset of the caller's.  */
30796   struct arm_build_target caller_target;
30797   struct arm_build_target callee_target;
30798   caller_target.isa = sbitmap_alloc (isa_num_bits);
30799   callee_target.isa = sbitmap_alloc (isa_num_bits);
30800
30801   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30802                               false);
30803   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30804                               false);
30805   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30806     can_inline = false;
30807
30808   sbitmap_free (caller_target.isa);
30809   sbitmap_free (callee_target.isa);
30810
30811   /* OK to inline between different modes.
30812      Function with mode specific instructions, e.g using asm,
30813      must be explicitly protected with noinline.  */
30814   return can_inline;
30815 }
30816
30817 /* Hook to fix function's alignment affected by target attribute.  */
30818
30819 static void
30820 arm_relayout_function (tree fndecl)
30821 {
30822   if (DECL_USER_ALIGN (fndecl))
30823     return;
30824
30825   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30826
30827   if (!callee_tree)
30828     callee_tree = target_option_default_node;
30829
30830   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30831   SET_DECL_ALIGN
30832     (fndecl,
30833      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30834 }
30835
30836 /* Inner function to process the attribute((target(...))), take an argument and
30837    set the current options from the argument.  If we have a list, recursively
30838    go over the list.  */
30839
30840 static bool
30841 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30842 {
30843   if (TREE_CODE (args) == TREE_LIST)
30844     {
30845       bool ret = true;
30846
30847       for (; args; args = TREE_CHAIN (args))
30848         if (TREE_VALUE (args)
30849             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30850           ret = false;
30851       return ret;
30852     }
30853
30854   else if (TREE_CODE (args) != STRING_CST)
30855     {
30856       error ("attribute %<target%> argument not a string");
30857       return false;
30858     }
30859
30860   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30861   char *q;
30862
30863   while ((q = strtok (argstr, ",")) != NULL)
30864     {
30865       argstr = NULL;
30866       if (!strcmp (q, "thumb"))
30867         opts->x_target_flags |= MASK_THUMB;
30868
30869       else if (!strcmp (q, "arm"))
30870         opts->x_target_flags &= ~MASK_THUMB;
30871
30872       else if (!strcmp (q, "general-regs-only"))
30873         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
30874
30875       else if (!strncmp (q, "fpu=", 4))
30876         {
30877           int fpu_index;
30878           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
30879                                        &fpu_index, CL_TARGET))
30880             {
30881               error ("invalid fpu for target attribute or pragma %qs", q);
30882               return false;
30883             }
30884           if (fpu_index == TARGET_FPU_auto)
30885             {
30886               /* This doesn't really make sense until we support
30887                  general dynamic selection of the architecture and all
30888                  sub-features.  */
30889               sorry ("auto fpu selection not currently permitted here");
30890               return false;
30891             }
30892           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30893         }
30894       else if (!strncmp (q, "arch=", 5))
30895         {
30896           char *arch = q + 5;
30897           const arch_option *arm_selected_arch
30898              = arm_parse_arch_option_name (all_architectures, "arch", arch);
30899
30900           if (!arm_selected_arch)
30901             {
30902               error ("invalid architecture for target attribute or pragma %qs",
30903                      q);
30904               return false;
30905             }
30906
30907           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30908         }
30909       else if (q[0] == '+')
30910         {
30911           opts->x_arm_arch_string
30912             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30913         }
30914       else
30915         {
30916           error ("unknown target attribute or pragma %qs", q);
30917           return false;
30918         }
30919     }
30920
30921   return true;
30922 }
30923
30924 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30925
30926 tree
30927 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30928                                  struct gcc_options *opts_set)
30929 {
30930   struct cl_target_option cl_opts;
30931
30932   if (!arm_valid_target_attribute_rec (args, opts))
30933     return NULL_TREE;
30934
30935   cl_target_option_save (&cl_opts, opts);
30936   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30937   arm_option_check_internal (opts);
30938   /* Do any overrides, such as global options arch=xxx.
30939      We do this since arm_active_target was overridden.  */
30940   arm_option_reconfigure_globals ();
30941   arm_options_perform_arch_sanity_checks ();
30942   arm_option_override_internal (opts, opts_set);
30943
30944   return build_target_option_node (opts);
30945 }
30946
30947 static void
30948 add_attribute  (const char * mode, tree *attributes)
30949 {
30950   size_t len = strlen (mode);
30951   tree value = build_string (len, mode);
30952
30953   TREE_TYPE (value) = build_array_type (char_type_node,
30954                                         build_index_type (size_int (len)));
30955
30956   *attributes = tree_cons (get_identifier ("target"),
30957                            build_tree_list (NULL_TREE, value),
30958                            *attributes);
30959 }
30960
30961 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30962
30963 static void
30964 arm_insert_attributes (tree fndecl, tree * attributes)
30965 {
30966   const char *mode;
30967
30968   if (! TARGET_FLIP_THUMB)
30969     return;
30970
30971   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30972       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
30973    return;
30974
30975   /* Nested definitions must inherit mode.  */
30976   if (current_function_decl)
30977    {
30978      mode = TARGET_THUMB ? "thumb" : "arm";
30979      add_attribute (mode, attributes);
30980      return;
30981    }
30982
30983   /* If there is already a setting don't change it.  */
30984   if (lookup_attribute ("target", *attributes) != NULL)
30985     return;
30986
30987   mode = thumb_flipper ? "thumb" : "arm";
30988   add_attribute (mode, attributes);
30989
30990   thumb_flipper = !thumb_flipper;
30991 }
30992
30993 /* Hook to validate attribute((target("string"))).  */
30994
30995 static bool
30996 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30997                               tree args, int ARG_UNUSED (flags))
30998 {
30999   bool ret = true;
31000   struct gcc_options func_options;
31001   tree cur_tree, new_optimize;
31002   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31003
31004   /* Get the optimization options of the current function.  */
31005   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31006
31007   /* If the function changed the optimization levels as well as setting target
31008      options, start with the optimizations specified.  */
31009   if (!func_optimize)
31010     func_optimize = optimization_default_node;
31011
31012   /* Init func_options.  */
31013   memset (&func_options, 0, sizeof (func_options));
31014   init_options_struct (&func_options, NULL);
31015   lang_hooks.init_options_struct (&func_options);
31016
31017   /* Initialize func_options to the defaults.  */
31018   cl_optimization_restore (&func_options,
31019                            TREE_OPTIMIZATION (func_optimize));
31020
31021   cl_target_option_restore (&func_options,
31022                             TREE_TARGET_OPTION (target_option_default_node));
31023
31024   /* Set func_options flags with new target mode.  */
31025   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
31026                                               &global_options_set);
31027
31028   if (cur_tree == NULL_TREE)
31029     ret = false;
31030
31031   new_optimize = build_optimization_node (&func_options);
31032
31033   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
31034
31035   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31036
31037   finalize_options_struct (&func_options);
31038
31039   return ret;
31040 }
31041
31042 /* Match an ISA feature bitmap to a named FPU.  We always use the
31043    first entry that exactly matches the feature set, so that we
31044    effectively canonicalize the FPU name for the assembler.  */
31045 static const char*
31046 arm_identify_fpu_from_isa (sbitmap isa)
31047 {
31048   auto_sbitmap fpubits (isa_num_bits);
31049   auto_sbitmap cand_fpubits (isa_num_bits);
31050
31051   bitmap_and (fpubits, isa, isa_all_fpubits);
31052
31053   /* If there are no ISA feature bits relating to the FPU, we must be
31054      doing soft-float.  */
31055   if (bitmap_empty_p (fpubits))
31056     return "softvfp";
31057
31058   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31059     {
31060       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
31061       if (bitmap_equal_p (fpubits, cand_fpubits))
31062         return all_fpus[i].name;
31063     }
31064   /* We must find an entry, or things have gone wrong.  */
31065   gcc_unreachable ();
31066 }
31067
31068 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
31069    by the function fndecl.  */
31070 void
31071 arm_declare_function_name (FILE *stream, const char *name, tree decl)
31072 {
31073   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31074
31075   struct cl_target_option *targ_options;
31076   if (target_parts)
31077     targ_options = TREE_TARGET_OPTION (target_parts);
31078   else
31079     targ_options = TREE_TARGET_OPTION (target_option_current_node);
31080   gcc_assert (targ_options);
31081
31082   /* Only update the assembler .arch string if it is distinct from the last
31083      such string we printed. arch_to_print is set conditionally in case
31084      targ_options->x_arm_arch_string is NULL which can be the case
31085      when cc1 is invoked directly without passing -march option.  */
31086   std::string arch_to_print;
31087   if (targ_options->x_arm_arch_string)
31088     arch_to_print = targ_options->x_arm_arch_string;
31089
31090   if (arch_to_print != arm_last_printed_arch_string)
31091     {
31092       std::string arch_name
31093         = arch_to_print.substr (0, arch_to_print.find ("+"));
31094       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31095       const arch_option *arch
31096         = arm_parse_arch_option_name (all_architectures, "-march",
31097                                       targ_options->x_arm_arch_string);
31098       auto_sbitmap opt_bits (isa_num_bits);
31099
31100       gcc_assert (arch);
31101       if (arch->common.extensions)
31102         {
31103           for (const struct cpu_arch_extension *opt = arch->common.extensions;
31104                opt->name != NULL;
31105                opt++)
31106             {
31107               if (!opt->remove)
31108                 {
31109                   arm_initialize_isa (opt_bits, opt->isa_bits);
31110                   if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31111                       && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31112                     asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31113                                  opt->name);
31114                 }
31115              }
31116         }
31117
31118       arm_last_printed_arch_string = arch_to_print;
31119     }
31120
31121   fprintf (stream, "\t.syntax unified\n");
31122
31123   if (TARGET_THUMB)
31124     {
31125       if (is_called_in_ARM_mode (decl)
31126           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31127               && cfun->is_thunk))
31128         fprintf (stream, "\t.code 32\n");
31129       else if (TARGET_THUMB1)
31130         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31131       else
31132         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31133     }
31134   else
31135     fprintf (stream, "\t.arm\n");
31136
31137   std::string fpu_to_print
31138     = TARGET_SOFT_FLOAT
31139         ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31140
31141   if (fpu_to_print != arm_last_printed_arch_string)
31142     {
31143       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31144       arm_last_printed_fpu_string = fpu_to_print;
31145     }
31146
31147   if (TARGET_POKE_FUNCTION_NAME)
31148     arm_poke_function_name (stream, (const char *) name);
31149 }
31150
31151 /* If MEM is in the form of [base+offset], extract the two parts
31152    of address and set to BASE and OFFSET, otherwise return false
31153    after clearing BASE and OFFSET.  */
31154
31155 static bool
31156 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31157 {
31158   rtx addr;
31159
31160   gcc_assert (MEM_P (mem));
31161
31162   addr = XEXP (mem, 0);
31163
31164   /* Strip off const from addresses like (const (addr)).  */
31165   if (GET_CODE (addr) == CONST)
31166     addr = XEXP (addr, 0);
31167
31168   if (GET_CODE (addr) == REG)
31169     {
31170       *base = addr;
31171       *offset = const0_rtx;
31172       return true;
31173     }
31174
31175   if (GET_CODE (addr) == PLUS
31176       && GET_CODE (XEXP (addr, 0)) == REG
31177       && CONST_INT_P (XEXP (addr, 1)))
31178     {
31179       *base = XEXP (addr, 0);
31180       *offset = XEXP (addr, 1);
31181       return true;
31182     }
31183
31184   *base = NULL_RTX;
31185   *offset = NULL_RTX;
31186
31187   return false;
31188 }
31189
31190 /* If INSN is a load or store of address in the form of [base+offset],
31191    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
31192    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
31193    otherwise return FALSE.  */
31194
31195 static bool
31196 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31197 {
31198   rtx x, dest, src;
31199
31200   gcc_assert (INSN_P (insn));
31201   x = PATTERN (insn);
31202   if (GET_CODE (x) != SET)
31203     return false;
31204
31205   src = SET_SRC (x);
31206   dest = SET_DEST (x);
31207   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31208     {
31209       *is_load = false;
31210       extract_base_offset_in_addr (dest, base, offset);
31211     }
31212   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31213     {
31214       *is_load = true;
31215       extract_base_offset_in_addr (src, base, offset);
31216     }
31217   else
31218     return false;
31219
31220   return (*base != NULL_RTX && *offset != NULL_RTX);
31221 }
31222
31223 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31224
31225    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31226    and PRI are only calculated for these instructions.  For other instruction,
31227    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
31228    instruction fusion can be supported by returning different priorities.
31229
31230    It's important that irrelevant instructions get the largest FUSION_PRI.  */
31231
31232 static void
31233 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31234                            int *fusion_pri, int *pri)
31235 {
31236   int tmp, off_val;
31237   bool is_load;
31238   rtx base, offset;
31239
31240   gcc_assert (INSN_P (insn));
31241
31242   tmp = max_pri - 1;
31243   if (!fusion_load_store (insn, &base, &offset, &is_load))
31244     {
31245       *pri = tmp;
31246       *fusion_pri = tmp;
31247       return;
31248     }
31249
31250   /* Load goes first.  */
31251   if (is_load)
31252     *fusion_pri = tmp - 1;
31253   else
31254     *fusion_pri = tmp - 2;
31255
31256   tmp /= 2;
31257
31258   /* INSN with smaller base register goes first.  */
31259   tmp -= ((REGNO (base) & 0xff) << 20);
31260
31261   /* INSN with smaller offset goes first.  */
31262   off_val = (int)(INTVAL (offset));
31263   if (off_val >= 0)
31264     tmp -= (off_val & 0xfffff);
31265   else
31266     tmp += ((- off_val) & 0xfffff);
31267
31268   *pri = tmp;
31269   return;
31270 }
31271
31272
31273 /* Construct and return a PARALLEL RTX vector with elements numbering the
31274    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31275    the vector - from the perspective of the architecture.  This does not
31276    line up with GCC's perspective on lane numbers, so we end up with
31277    different masks depending on our target endian-ness.  The diagram
31278    below may help.  We must draw the distinction when building masks
31279    which select one half of the vector.  An instruction selecting
31280    architectural low-lanes for a big-endian target, must be described using
31281    a mask selecting GCC high-lanes.
31282
31283                  Big-Endian             Little-Endian
31284
31285 GCC             0   1   2   3           3   2   1   0
31286               | x | x | x | x |       | x | x | x | x |
31287 Architecture    3   2   1   0           3   2   1   0
31288
31289 Low Mask:         { 2, 3 }                { 0, 1 }
31290 High Mask:        { 0, 1 }                { 2, 3 }
31291 */
31292
31293 rtx
31294 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31295 {
31296   int nunits = GET_MODE_NUNITS (mode);
31297   rtvec v = rtvec_alloc (nunits / 2);
31298   int high_base = nunits / 2;
31299   int low_base = 0;
31300   int base;
31301   rtx t1;
31302   int i;
31303
31304   if (BYTES_BIG_ENDIAN)
31305     base = high ? low_base : high_base;
31306   else
31307     base = high ? high_base : low_base;
31308
31309   for (i = 0; i < nunits / 2; i++)
31310     RTVEC_ELT (v, i) = GEN_INT (base + i);
31311
31312   t1 = gen_rtx_PARALLEL (mode, v);
31313   return t1;
31314 }
31315
31316 /* Check OP for validity as a PARALLEL RTX vector with elements
31317    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31318    from the perspective of the architecture.  See the diagram above
31319    arm_simd_vect_par_cnst_half_p for more details.  */
31320
31321 bool
31322 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31323                                        bool high)
31324 {
31325   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31326   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31327   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31328   int i = 0;
31329
31330   if (!VECTOR_MODE_P (mode))
31331     return false;
31332
31333   if (count_op != count_ideal)
31334     return false;
31335
31336   for (i = 0; i < count_ideal; i++)
31337     {
31338       rtx elt_op = XVECEXP (op, 0, i);
31339       rtx elt_ideal = XVECEXP (ideal, 0, i);
31340
31341       if (!CONST_INT_P (elt_op)
31342           || INTVAL (elt_ideal) != INTVAL (elt_op))
31343         return false;
31344     }
31345   return true;
31346 }
31347
31348 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31349    in Thumb1.  */
31350 static bool
31351 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31352                          const_tree)
31353 {
31354   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31355   if (vcall_offset && TARGET_THUMB1)
31356     return false;
31357
31358   /* Otherwise ok.  */
31359   return true;
31360 }
31361
31362 /* Generate RTL for a conditional branch with rtx comparison CODE in
31363    mode CC_MODE. The destination of the unlikely conditional branch
31364    is LABEL_REF.  */
31365
31366 void
31367 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31368                           rtx label_ref)
31369 {
31370   rtx x;
31371   x = gen_rtx_fmt_ee (code, VOIDmode,
31372                       gen_rtx_REG (cc_mode, CC_REGNUM),
31373                       const0_rtx);
31374
31375   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31376                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31377                             pc_rtx);
31378   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31379 }
31380
31381 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31382
31383    For pure-code sections there is no letter code for this attribute, so
31384    output all the section flags numerically when this is needed.  */
31385
31386 static bool
31387 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31388 {
31389
31390   if (flags & SECTION_ARM_PURECODE)
31391     {
31392       *num = 0x20000000;
31393
31394       if (!(flags & SECTION_DEBUG))
31395         *num |= 0x2;
31396       if (flags & SECTION_EXCLUDE)
31397         *num |= 0x80000000;
31398       if (flags & SECTION_WRITE)
31399         *num |= 0x1;
31400       if (flags & SECTION_CODE)
31401         *num |= 0x4;
31402       if (flags & SECTION_MERGE)
31403         *num |= 0x10;
31404       if (flags & SECTION_STRINGS)
31405         *num |= 0x20;
31406       if (flags & SECTION_TLS)
31407         *num |= 0x400;
31408       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31409         *num |= 0x200;
31410
31411         return true;
31412     }
31413
31414   return false;
31415 }
31416
31417 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31418
31419    If pure-code is passed as an option, make sure all functions are in
31420    sections that have the SHF_ARM_PURECODE attribute.  */
31421
31422 static section *
31423 arm_function_section (tree decl, enum node_frequency freq,
31424                       bool startup, bool exit)
31425 {
31426   const char * section_name;
31427   section * sec;
31428
31429   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31430     return default_function_section (decl, freq, startup, exit);
31431
31432   if (!target_pure_code)
31433     return default_function_section (decl, freq, startup, exit);
31434
31435
31436   section_name = DECL_SECTION_NAME (decl);
31437
31438   /* If a function is not in a named section then it falls under the 'default'
31439      text section, also known as '.text'.  We can preserve previous behavior as
31440      the default text section already has the SHF_ARM_PURECODE section
31441      attribute.  */
31442   if (!section_name)
31443     {
31444       section *default_sec = default_function_section (decl, freq, startup,
31445                                                        exit);
31446
31447       /* If default_sec is not null, then it must be a special section like for
31448          example .text.startup.  We set the pure-code attribute and return the
31449          same section to preserve existing behavior.  */
31450       if (default_sec)
31451           default_sec->common.flags |= SECTION_ARM_PURECODE;
31452       return default_sec;
31453     }
31454
31455   /* Otherwise look whether a section has already been created with
31456      'section_name'.  */
31457   sec = get_named_section (decl, section_name, 0);
31458   if (!sec)
31459     /* If that is not the case passing NULL as the section's name to
31460        'get_named_section' will create a section with the declaration's
31461        section name.  */
31462     sec = get_named_section (decl, NULL, 0);
31463
31464   /* Set the SHF_ARM_PURECODE attribute.  */
31465   sec->common.flags |= SECTION_ARM_PURECODE;
31466
31467   return sec;
31468 }
31469
31470 /* Implements the TARGET_SECTION_FLAGS hook.
31471
31472    If DECL is a function declaration and pure-code is passed as an option
31473    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31474    section's name and RELOC indicates whether the declarations initializer may
31475    contain runtime relocations.  */
31476
31477 static unsigned int
31478 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31479 {
31480   unsigned int flags = default_section_type_flags (decl, name, reloc);
31481
31482   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31483     flags |= SECTION_ARM_PURECODE;
31484
31485   return flags;
31486 }
31487
31488 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31489
31490 static void
31491 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31492                            rtx op0, rtx op1,
31493                            rtx *quot_p, rtx *rem_p)
31494 {
31495   if (mode == SImode)
31496     gcc_assert (!TARGET_IDIV);
31497
31498   scalar_int_mode libval_mode
31499     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31500
31501   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31502                                         libval_mode,
31503                                         op0, GET_MODE (op0),
31504                                         op1, GET_MODE (op1));
31505
31506   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31507   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31508                                        GET_MODE_SIZE (mode));
31509
31510   gcc_assert (quotient);
31511   gcc_assert (remainder);
31512
31513   *quot_p = quotient;
31514   *rem_p = remainder;
31515 }
31516
31517 /*  This function checks for the availability of the coprocessor builtin passed
31518     in BUILTIN for the current target.  Returns true if it is available and
31519     false otherwise.  If a BUILTIN is passed for which this function has not
31520     been implemented it will cause an exception.  */
31521
31522 bool
31523 arm_coproc_builtin_available (enum unspecv builtin)
31524 {
31525   /* None of these builtins are available in Thumb mode if the target only
31526      supports Thumb-1.  */
31527   if (TARGET_THUMB1)
31528     return false;
31529
31530   switch (builtin)
31531     {
31532       case VUNSPEC_CDP:
31533       case VUNSPEC_LDC:
31534       case VUNSPEC_LDCL:
31535       case VUNSPEC_STC:
31536       case VUNSPEC_STCL:
31537       case VUNSPEC_MCR:
31538       case VUNSPEC_MRC:
31539         if (arm_arch4)
31540           return true;
31541         break;
31542       case VUNSPEC_CDP2:
31543       case VUNSPEC_LDC2:
31544       case VUNSPEC_LDC2L:
31545       case VUNSPEC_STC2:
31546       case VUNSPEC_STC2L:
31547       case VUNSPEC_MCR2:
31548       case VUNSPEC_MRC2:
31549         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31550            ARMv8-{A,M}.  */
31551         if (arm_arch5t)
31552           return true;
31553         break;
31554       case VUNSPEC_MCRR:
31555       case VUNSPEC_MRRC:
31556         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31557            ARMv8-{A,M}.  */
31558         if (arm_arch6 || arm_arch5te)
31559           return true;
31560         break;
31561       case VUNSPEC_MCRR2:
31562       case VUNSPEC_MRRC2:
31563         if (arm_arch6)
31564           return true;
31565         break;
31566       default:
31567         gcc_unreachable ();
31568     }
31569   return false;
31570 }
31571
31572 /* This function returns true if OP is a valid memory operand for the ldc and
31573    stc coprocessor instructions and false otherwise.  */
31574
31575 bool
31576 arm_coproc_ldc_stc_legitimate_address (rtx op)
31577 {
31578   HOST_WIDE_INT range;
31579   /* Has to be a memory operand.  */
31580   if (!MEM_P (op))
31581     return false;
31582
31583   op = XEXP (op, 0);
31584
31585   /* We accept registers.  */
31586   if (REG_P (op))
31587     return true;
31588
31589   switch GET_CODE (op)
31590     {
31591       case PLUS:
31592         {
31593           /* Or registers with an offset.  */
31594           if (!REG_P (XEXP (op, 0)))
31595             return false;
31596
31597           op = XEXP (op, 1);
31598
31599           /* The offset must be an immediate though.  */
31600           if (!CONST_INT_P (op))
31601             return false;
31602
31603           range = INTVAL (op);
31604
31605           /* Within the range of [-1020,1020].  */
31606           if (!IN_RANGE (range, -1020, 1020))
31607             return false;
31608
31609           /* And a multiple of 4.  */
31610           return (range % 4) == 0;
31611         }
31612       case PRE_INC:
31613       case POST_INC:
31614       case PRE_DEC:
31615       case POST_DEC:
31616         return REG_P (XEXP (op, 0));
31617       default:
31618         gcc_unreachable ();
31619     }
31620   return false;
31621 }
31622
31623 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31624
31625    In VFPv1, VFP registers could only be accessed in the mode they were
31626    set, so subregs would be invalid there.  However, we don't support
31627    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31628
31629    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31630    VFP registers in little-endian order.  We can't describe that accurately to
31631    GCC, so avoid taking subregs of such values.
31632
31633    The only exception is going from a 128-bit to a 64-bit type.  In that
31634    case the data layout happens to be consistent for big-endian, so we
31635    explicitly allow that case.  */
31636
31637 static bool
31638 arm_can_change_mode_class (machine_mode from, machine_mode to,
31639                            reg_class_t rclass)
31640 {
31641   if (TARGET_BIG_END
31642       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31643       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31644           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31645       && reg_classes_intersect_p (VFP_REGS, rclass))
31646     return false;
31647   return true;
31648 }
31649
31650 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31651    strcpy from constants will be faster.  */
31652
31653 static HOST_WIDE_INT
31654 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31655 {
31656   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31657   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31658     return MAX (align, BITS_PER_WORD * factor);
31659   return align;
31660 }
31661
31662 /* Emit a speculation barrier on target architectures that do not have
31663    DSB/ISB directly.  Such systems probably don't need a barrier
31664    themselves, but if the code is ever run on a later architecture, it
31665    might become a problem.  */
31666 void
31667 arm_emit_speculation_barrier_function ()
31668 {
31669   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
31670 }
31671
31672 #if CHECKING_P
31673 namespace selftest {
31674
31675 /* Scan the static data tables generated by parsecpu.awk looking for
31676    potential issues with the data.  We primarily check for
31677    inconsistencies in the option extensions at present (extensions
31678    that duplicate others but aren't marked as aliases).  Furthermore,
31679    for correct canonicalization later options must never be a subset
31680    of an earlier option.  Any extension should also only specify other
31681    feature bits and never an architecture bit.  The architecture is inferred
31682    from the declaration of the extension.  */
31683 static void
31684 arm_test_cpu_arch_data (void)
31685 {
31686   const arch_option *arch;
31687   const cpu_option *cpu;
31688   auto_sbitmap target_isa (isa_num_bits);
31689   auto_sbitmap isa1 (isa_num_bits);
31690   auto_sbitmap isa2 (isa_num_bits);
31691
31692   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31693     {
31694       const cpu_arch_extension *ext1, *ext2;
31695
31696       if (arch->common.extensions == NULL)
31697         continue;
31698
31699       arm_initialize_isa (target_isa, arch->common.isa_bits);
31700
31701       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31702         {
31703           if (ext1->alias)
31704             continue;
31705
31706           arm_initialize_isa (isa1, ext1->isa_bits);
31707           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31708             {
31709               if (ext2->alias || ext1->remove != ext2->remove)
31710                 continue;
31711
31712               arm_initialize_isa (isa2, ext2->isa_bits);
31713               /* If the option is a subset of the parent option, it doesn't
31714                  add anything and so isn't useful.  */
31715               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31716
31717               /* If the extension specifies any architectural bits then
31718                  disallow it.  Extensions should only specify feature bits.  */
31719               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31720             }
31721         }
31722     }
31723
31724   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31725     {
31726       const cpu_arch_extension *ext1, *ext2;
31727
31728       if (cpu->common.extensions == NULL)
31729         continue;
31730
31731       arm_initialize_isa (target_isa, arch->common.isa_bits);
31732
31733       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31734         {
31735           if (ext1->alias)
31736             continue;
31737
31738           arm_initialize_isa (isa1, ext1->isa_bits);
31739           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31740             {
31741               if (ext2->alias || ext1->remove != ext2->remove)
31742                 continue;
31743
31744               arm_initialize_isa (isa2, ext2->isa_bits);
31745               /* If the option is a subset of the parent option, it doesn't
31746                  add anything and so isn't useful.  */
31747               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31748
31749               /* If the extension specifies any architectural bits then
31750                  disallow it.  Extensions should only specify feature bits.  */
31751               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31752             }
31753         }
31754     }
31755 }
31756
31757 /* Scan the static data tables generated by parsecpu.awk looking for
31758    potential issues with the data.  Here we check for consistency between the
31759    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31760    a feature bit that is not defined by any FPU flag.  */
31761 static void
31762 arm_test_fpu_data (void)
31763 {
31764   auto_sbitmap isa_all_fpubits (isa_num_bits);
31765   auto_sbitmap fpubits (isa_num_bits);
31766   auto_sbitmap tmpset (isa_num_bits);
31767
31768   static const enum isa_feature fpu_bitlist[]
31769     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31770   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31771
31772   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31773   {
31774     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31775     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31776     bitmap_clear (isa_all_fpubits);
31777     bitmap_copy (isa_all_fpubits, tmpset);
31778   }
31779
31780   if (!bitmap_empty_p (isa_all_fpubits))
31781     {
31782         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31783                          " group that are not defined by any FPU.\n"
31784                          "       Check your arm-cpus.in.\n");
31785         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31786     }
31787 }
31788
31789 static void
31790 arm_run_selftests (void)
31791 {
31792   arm_test_cpu_arch_data ();
31793   arm_test_fpu_data ();
31794 }
31795 } /* Namespace selftest.  */
31796
31797 #undef TARGET_RUN_TARGET_SELFTESTS
31798 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31799 #endif /* CHECKING_P */
31800
31801 struct gcc_target targetm = TARGET_INITIALIZER;
31802
31803 #include "gt-arm.h"