gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2018 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "df.h"
  36 #include "tm_p.h"
  37 #include "stringpool.h"
  38 #include "attribs.h"
  39 #include "optabs.h"
  40 #include "regs.h"
  41 #include "emit-rtl.h"
  42 #include "recog.h"
  43 #include "cgraph.h"
  44 #include "diagnostic-core.h"
  45 #include "alias.h"
  46 #include "fold-const.h"
  47 #include "stor-layout.h"
  48 #include "calls.h"
  49 #include "varasm.h"
  50 #include "output.h"
  51 #include "insn-attr.h"
  52 #include "flags.h"
  53 #include "reload.h"
  54 #include "explow.h"
  55 #include "expr.h"
  56 #include "cfgrtl.h"
  57 #include "sched-int.h"
  58 #include "common/common-target.h"
  59 #include "langhooks.h"
  60 #include "intl.h"
  61 #include "libfuncs.h"
  62 #include "params.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "selftest.h"
  73
  74 /* This file should be included last.  */
  75 #include "target-def.h"
  76
  77 /* Forward definitions of types.  */
  78 typedef struct minipool_node    Mnode;
  79 typedef struct minipool_fixup   Mfix;
  80
  81 /* The last .arch and .fpu assembly strings that we printed.  */
  82 static std::string arm_last_printed_arch_string;
  83 static std::string arm_last_printed_fpu_string;
  84
  85 void (*arm_lang_output_object_attributes_hook)(void);
  86
  87 struct four_ints
  88 {
  89   int i[4];
  90 };
  91
  92 /* Forward function declarations.  */
  93 static bool arm_const_not_ok_for_debug_p (rtx);
  94 static int arm_needs_doubleword_align (machine_mode, const_tree);
  95 static int arm_compute_static_chain_stack_bytes (void);
  96 static arm_stack_offsets *arm_get_frame_offsets (void);
  97 static void arm_compute_frame_layout (void);
  98 static void arm_add_gc_roots (void);
  99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 100                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 101 static unsigned bit_count (unsigned long);
 102 static unsigned bitmap_popcount (const sbitmap);
 103 static int arm_address_register_rtx_p (rtx, int);
 104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 105 static bool is_called_in_ARM_mode (tree);
 106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 111 inline static int thumb1_index_register_rtx_p (rtx, int);
 112 static int thumb_far_jump_used_p (void);
 113 static bool thumb_force_lr_save (void);
 114 static unsigned arm_size_return_regs (void);
 115 static bool arm_assemble_integer (rtx, unsigned int, int);
 116 static void arm_print_operand (FILE *, rtx, int);
 117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 118 static bool arm_print_operand_punct_valid_p (unsigned char code);
 119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 120 static arm_cc get_arm_condition_code (rtx);
 121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 122 static const char *output_multi_immediate (rtx *, const char *, const char *,
 123                                            int, HOST_WIDE_INT);
 124 static const char *shift_op (rtx, HOST_WIDE_INT *);
 125 static struct machine_function *arm_init_machine_status (void);
 126 static void thumb_exit (FILE *, int);
 127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 129 static Mnode *add_minipool_forward_ref (Mfix *);
 130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 131 static Mnode *add_minipool_backward_ref (Mfix *);
 132 static void assign_minipool_offsets (Mfix *);
 133 static void arm_print_value (FILE *, rtx);
 134 static void dump_minipool (rtx_insn *);
 135 static int arm_barrier_cost (rtx_insn *);
 136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 139                                machine_mode, rtx);
 140 static void arm_reorg (void);
 141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 143 static unsigned long arm_compute_save_core_reg_mask (void);
 144 static unsigned long arm_isr_value (tree);
 145 static unsigned long arm_compute_func_type (void);
 146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 151 #endif
 152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 154 static void arm_output_function_epilogue (FILE *);
 155 static void arm_output_function_prologue (FILE *);
 156 static int arm_comp_type_attributes (const_tree, const_tree);
 157 static void arm_set_default_type_attributes (tree);
 158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 160 static int optimal_immediate_sequence (enum rtx_code code,
 161                                        unsigned HOST_WIDE_INT val,
 162                                        struct four_ints *return_sequence);
 163 static int optimal_immediate_sequence_1 (enum rtx_code code,
 164                                          unsigned HOST_WIDE_INT val,
 165                                          struct four_ints *return_sequence,
 166                                          int i);
 167 static int arm_get_strip_length (int);
 168 static bool arm_function_ok_for_sibcall (tree, tree);
 169 static machine_mode arm_promote_function_mode (const_tree,
 170                                                     machine_mode, int *,
 171                                                     const_tree, int);
 172 static bool arm_return_in_memory (const_tree, const_tree);
 173 static rtx arm_function_value (const_tree, const_tree, bool);
 174 static rtx arm_libcall_value_1 (machine_mode);
 175 static rtx arm_libcall_value (machine_mode, const_rtx);
 176 static bool arm_function_value_regno_p (const unsigned int);
 177 static void arm_internal_label (FILE *, const char *, unsigned long);
 178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 179                                  tree);
 180 static bool arm_have_conditional_execution (void);
 181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 182 static bool arm_legitimate_constant_p (machine_mode, rtx);
 183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 187 static void emit_constant_insn (rtx cond, rtx pattern);
 188 static rtx_insn *emit_set_insn (rtx, rtx);
 189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 191                                   tree, bool);
 192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 193                              const_tree, bool);
 194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 195                                       const_tree, bool);
 196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 199                                       const_tree);
 200 static rtx aapcs_libcall_value (machine_mode);
 201 static int aapcs_select_return_coproc (const_tree, const_tree);
 202
 203 #ifdef OBJECT_FORMAT_ELF
 204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 206 #endif
 207 #ifndef ARM_PE
 208 static void arm_encode_section_info (tree, rtx, int);
 209 #endif
 210
 211 static void arm_file_end (void);
 212 static void arm_file_start (void);
 213 static void arm_insert_attributes (tree, tree *);
 214
 215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 216                                         tree, int *, int);
 217 static bool arm_pass_by_reference (cumulative_args_t,
 218                                    machine_mode, const_tree, bool);
 219 static bool arm_promote_prototypes (const_tree);
 220 static bool arm_default_short_enums (void);
 221 static bool arm_align_anon_bitfield (void);
 222 static bool arm_return_in_msb (const_tree);
 223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 224 static bool arm_return_in_memory (const_tree, const_tree);
 225 #if ARM_UNWIND_INFO
 226 static void arm_unwind_emit (FILE *, rtx_insn *);
 227 static bool arm_output_ttype (rtx);
 228 static void arm_asm_emit_except_personality (rtx);
 229 #endif
 230 static void arm_asm_init_sections (void);
 231 static rtx arm_dwarf_register_span (rtx);
 232
 233 static tree arm_cxx_guard_type (void);
 234 static bool arm_cxx_guard_mask_bit (void);
 235 static tree arm_get_cookie_size (tree);
 236 static bool arm_cookie_has_size (void);
 237 static bool arm_cxx_cdtor_returns_this (void);
 238 static bool arm_cxx_key_method_may_be_inline (void);
 239 static void arm_cxx_determine_class_data_visibility (tree);
 240 static bool arm_cxx_class_data_always_comdat (void);
 241 static bool arm_cxx_use_aeabi_atexit (void);
 242 static void arm_init_libfuncs (void);
 243 static tree arm_build_builtin_va_list (void);
 244 static void arm_expand_builtin_va_start (tree, rtx);
 245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 246 static void arm_option_override (void);
 247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
 248 static void arm_option_restore (struct gcc_options *,
 249                                 struct cl_target_option *);
 250 static void arm_override_options_after_change (void);
 251 static void arm_option_print (FILE *, int, struct cl_target_option *);
 252 static void arm_set_current_function (tree);
 253 static bool arm_can_inline_p (tree, tree);
 254 static void arm_relayout_function (tree);
 255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 257 static bool arm_sched_can_speculate_insn (rtx_insn *);
 258 static bool arm_macro_fusion_p (void);
 259 static bool arm_cannot_copy_insn_p (rtx_insn *);
 260 static int arm_issue_rate (void);
 261 static int arm_first_cycle_multipass_dfa_lookahead (void);
 262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 264 static bool arm_output_addr_const_extra (FILE *, rtx);
 265 static bool arm_allocate_stack_slots_for_args (void);
 266 static bool arm_warn_func_return (tree);
 267 static tree arm_promoted_type (const_tree t);
 268 static bool arm_scalar_mode_supported_p (scalar_mode);
 269 static bool arm_frame_pointer_required (void);
 270 static bool arm_can_eliminate (const int, const int);
 271 static void arm_asm_trampoline_template (FILE *);
 272 static void arm_trampoline_init (rtx, tree, rtx);
 273 static rtx arm_trampoline_adjust_address (rtx);
 274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 278 static bool arm_array_mode_supported_p (machine_mode,
 279                                         unsigned HOST_WIDE_INT);
 280 static machine_mode arm_preferred_simd_mode (scalar_mode);
 281 static bool arm_class_likely_spilled_p (reg_class_t);
 282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 285                                                      const_tree type,
 286                                                      int misalignment,
 287                                                      bool is_packed);
 288 static void arm_conditional_register_usage (void);
 289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 291 static void arm_autovectorize_vector_sizes (vector_sizes *);
 292 static int arm_default_branch_cost (bool, bool);
 293 static int arm_cortex_a5_branch_cost (bool, bool);
 294 static int arm_cortex_m_branch_cost (bool, bool);
 295 static int arm_cortex_m7_branch_cost (bool, bool);
 296
 297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
 298                                           const vec_perm_indices &);
 299
 300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 301
 302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 303                                            tree vectype,
 304                                            int misalign ATTRIBUTE_UNUSED);
 305 static unsigned arm_add_stmt_cost (void *data, int count,
 306                                    enum vect_cost_for_stmt kind,
 307                                    struct _stmt_vec_info *stmt_info,
 308                                    int misalign,
 309                                    enum vect_cost_model_location where);
 310
 311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 312                                          bool op0_preserve_value);
 313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 314
 315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 317                                      const_tree);
 318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 321                                                 int reloc);
 322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 328 \f
 329 /* Table of machine attributes.  */
 330 static const struct attribute_spec arm_attribute_table[] =
 331 {
 332   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 333        affects_type_identity, handler, exclude } */
 334   /* Function calls made to this symbol must be done indirectly, because
 335      it may lie outside of the 26 bit addressing range of a normal function
 336      call.  */
 337   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 338   /* Whereas these functions are always known to reside within the 26 bit
 339      addressing range.  */
 340   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 341   /* Specify the procedure call conventions for a function.  */
 342   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 343     NULL },
 344   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 345   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 346     NULL },
 347   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 348     NULL },
 349   { "naked",        0, 0, true,  false, false, false,
 350     arm_handle_fndecl_attribute, NULL },
 351 #ifdef ARM_PE
 352   /* ARM/PE has three new attributes:
 353      interfacearm - ?
 354      dllexport - for exporting a function/variable that will live in a dll
 355      dllimport - for importing a function/variable from a dll
 356
 357      Microsoft allows multiple declspecs in one __declspec, separating
 358      them with spaces.  We do NOT support this.  Instead, use __declspec
 359      multiple times.
 360   */
 361   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
 362   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
 363   { "interfacearm", 0, 0, true,  false, false, false,
 364     arm_handle_fndecl_attribute, NULL },
 365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 366   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 367     NULL },
 368   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 369     NULL },
 370   { "notshared",    0, 0, false, true, false, false,
 371     arm_handle_notshared_attribute, NULL },
 372 #endif
 373   /* ARMv8-M Security Extensions support.  */
 374   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 375     arm_handle_cmse_nonsecure_entry, NULL },
 376   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
 377     arm_handle_cmse_nonsecure_call, NULL },
 378   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 379 };
 380 \f
 381 /* Initialize the GCC target structure.  */
 382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 383 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 385 #endif
 386
 387 #undef TARGET_LEGITIMIZE_ADDRESS
 388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 389
 390 #undef  TARGET_ATTRIBUTE_TABLE
 391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 392
 393 #undef  TARGET_INSERT_ATTRIBUTES
 394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 395
 396 #undef TARGET_ASM_FILE_START
 397 #define TARGET_ASM_FILE_START arm_file_start
 398 #undef TARGET_ASM_FILE_END
 399 #define TARGET_ASM_FILE_END arm_file_end
 400
 401 #undef  TARGET_ASM_ALIGNED_SI_OP
 402 #define TARGET_ASM_ALIGNED_SI_OP NULL
 403 #undef  TARGET_ASM_INTEGER
 404 #define TARGET_ASM_INTEGER arm_assemble_integer
 405
 406 #undef TARGET_PRINT_OPERAND
 407 #define TARGET_PRINT_OPERAND arm_print_operand
 408 #undef TARGET_PRINT_OPERAND_ADDRESS
 409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 412
 413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 415
 416 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 418
 419 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 421
 422 #undef TARGET_CAN_INLINE_P
 423 #define TARGET_CAN_INLINE_P arm_can_inline_p
 424
 425 #undef TARGET_RELAYOUT_FUNCTION
 426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 427
 428 #undef  TARGET_OPTION_OVERRIDE
 429 #define TARGET_OPTION_OVERRIDE arm_option_override
 430
 431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 433
 434 #undef TARGET_OPTION_SAVE
 435 #define TARGET_OPTION_SAVE arm_option_save
 436
 437 #undef TARGET_OPTION_RESTORE
 438 #define TARGET_OPTION_RESTORE arm_option_restore
 439
 440 #undef TARGET_OPTION_PRINT
 441 #define TARGET_OPTION_PRINT arm_option_print
 442
 443 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 445
 446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 448
 449 #undef TARGET_SCHED_MACRO_FUSION_P
 450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 451
 452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 454
 455 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 457
 458 #undef  TARGET_SCHED_ADJUST_COST
 459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 460
 461 #undef TARGET_SET_CURRENT_FUNCTION
 462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 463
 464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 466
 467 #undef TARGET_SCHED_REORDER
 468 #define TARGET_SCHED_REORDER arm_sched_reorder
 469
 470 #undef TARGET_REGISTER_MOVE_COST
 471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 472
 473 #undef TARGET_MEMORY_MOVE_COST
 474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 475
 476 #undef TARGET_ENCODE_SECTION_INFO
 477 #ifdef ARM_PE
 478 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 479 #else
 480 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 481 #endif
 482
 483 #undef  TARGET_STRIP_NAME_ENCODING
 484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 485
 486 #undef  TARGET_ASM_INTERNAL_LABEL
 487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 488
 489 #undef TARGET_FLOATN_MODE
 490 #define TARGET_FLOATN_MODE arm_floatn_mode
 491
 492 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 494
 495 #undef  TARGET_FUNCTION_VALUE
 496 #define TARGET_FUNCTION_VALUE arm_function_value
 497
 498 #undef  TARGET_LIBCALL_VALUE
 499 #define TARGET_LIBCALL_VALUE arm_libcall_value
 500
 501 #undef TARGET_FUNCTION_VALUE_REGNO_P
 502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 503
 504 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 506 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 508
 509 #undef  TARGET_RTX_COSTS
 510 #define TARGET_RTX_COSTS arm_rtx_costs
 511 #undef  TARGET_ADDRESS_COST
 512 #define TARGET_ADDRESS_COST arm_address_cost
 513
 514 #undef TARGET_SHIFT_TRUNCATION_MASK
 515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 524   arm_autovectorize_vector_sizes
 525
 526 #undef  TARGET_MACHINE_DEPENDENT_REORG
 527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 528
 529 #undef  TARGET_INIT_BUILTINS
 530 #define TARGET_INIT_BUILTINS  arm_init_builtins
 531 #undef  TARGET_EXPAND_BUILTIN
 532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 533 #undef  TARGET_BUILTIN_DECL
 534 #define TARGET_BUILTIN_DECL arm_builtin_decl
 535
 536 #undef TARGET_INIT_LIBFUNCS
 537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 538
 539 #undef TARGET_PROMOTE_FUNCTION_MODE
 540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 541 #undef TARGET_PROMOTE_PROTOTYPES
 542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 543 #undef TARGET_PASS_BY_REFERENCE
 544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 545 #undef TARGET_ARG_PARTIAL_BYTES
 546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 547 #undef TARGET_FUNCTION_ARG
 548 #define TARGET_FUNCTION_ARG arm_function_arg
 549 #undef TARGET_FUNCTION_ARG_ADVANCE
 550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 551 #undef TARGET_FUNCTION_ARG_PADDING
 552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 553 #undef TARGET_FUNCTION_ARG_BOUNDARY
 554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 555
 556 #undef  TARGET_SETUP_INCOMING_VARARGS
 557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 558
 559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 561
 562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 564 #undef TARGET_TRAMPOLINE_INIT
 565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 568
 569 #undef TARGET_WARN_FUNC_RETURN
 570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 571
 572 #undef TARGET_DEFAULT_SHORT_ENUMS
 573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 574
 575 #undef TARGET_ALIGN_ANON_BITFIELD
 576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 577
 578 #undef TARGET_NARROW_VOLATILE_BITFIELD
 579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 580
 581 #undef TARGET_CXX_GUARD_TYPE
 582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 583
 584 #undef TARGET_CXX_GUARD_MASK_BIT
 585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 586
 587 #undef TARGET_CXX_GET_COOKIE_SIZE
 588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 589
 590 #undef TARGET_CXX_COOKIE_HAS_SIZE
 591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 592
 593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 595
 596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 598
 599 #undef TARGET_CXX_USE_AEABI_ATEXIT
 600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 601
 602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 604   arm_cxx_determine_class_data_visibility
 605
 606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 608
 609 #undef TARGET_RETURN_IN_MSB
 610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 611
 612 #undef TARGET_RETURN_IN_MEMORY
 613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 614
 615 #undef TARGET_MUST_PASS_IN_STACK
 616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 617
 618 #if ARM_UNWIND_INFO
 619 #undef TARGET_ASM_UNWIND_EMIT
 620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 621
 622 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 623 #undef TARGET_ASM_TTYPE
 624 #define TARGET_ASM_TTYPE arm_output_ttype
 625
 626 #undef TARGET_ARM_EABI_UNWINDER
 627 #define TARGET_ARM_EABI_UNWINDER true
 628
 629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 631
 632 #endif /* ARM_UNWIND_INFO */
 633
 634 #undef TARGET_ASM_INIT_SECTIONS
 635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 636
 637 #undef TARGET_DWARF_REGISTER_SPAN
 638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 639
 640 #undef  TARGET_CANNOT_COPY_INSN_P
 641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 642
 643 #ifdef HAVE_AS_TLS
 644 #undef TARGET_HAVE_TLS
 645 #define TARGET_HAVE_TLS true
 646 #endif
 647
 648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 650
 651 #undef TARGET_LEGITIMATE_CONSTANT_P
 652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 653
 654 #undef TARGET_CANNOT_FORCE_CONST_MEM
 655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 656
 657 #undef TARGET_MAX_ANCHOR_OFFSET
 658 #define TARGET_MAX_ANCHOR_OFFSET 4095
 659
 660 /* The minimum is set such that the total size of the block
 661    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 662    divisible by eight, ensuring natural spacing of anchors.  */
 663 #undef TARGET_MIN_ANCHOR_OFFSET
 664 #define TARGET_MIN_ANCHOR_OFFSET -4088
 665
 666 #undef TARGET_SCHED_ISSUE_RATE
 667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 668
 669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 671   arm_first_cycle_multipass_dfa_lookahead
 672
 673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 675   arm_first_cycle_multipass_dfa_lookahead_guard
 676
 677 #undef TARGET_MANGLE_TYPE
 678 #define TARGET_MANGLE_TYPE arm_mangle_type
 679
 680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 682
 683 #undef TARGET_BUILD_BUILTIN_VA_LIST
 684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 685 #undef TARGET_EXPAND_BUILTIN_VA_START
 686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 689
 690 #ifdef HAVE_AS_TLS
 691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 693 #endif
 694
 695 #undef TARGET_LEGITIMATE_ADDRESS_P
 696 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 697
 698 #undef TARGET_PREFERRED_RELOAD_CLASS
 699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 700
 701 #undef TARGET_PROMOTED_TYPE
 702 #define TARGET_PROMOTED_TYPE arm_promoted_type
 703
 704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 706
 707 #undef TARGET_COMPUTE_FRAME_LAYOUT
 708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 709
 710 #undef TARGET_FRAME_POINTER_REQUIRED
 711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 712
 713 #undef TARGET_CAN_ELIMINATE
 714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 715
 716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 718
 719 #undef TARGET_CLASS_LIKELY_SPILLED_P
 720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 721
 722 #undef TARGET_VECTORIZE_BUILTINS
 723 #define TARGET_VECTORIZE_BUILTINS
 724
 725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 727   arm_builtin_vectorized_function
 728
 729 #undef TARGET_VECTOR_ALIGNMENT
 730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 731
 732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 734   arm_vector_alignment_reachable
 735
 736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 738   arm_builtin_support_vector_misalignment
 739
 740 #undef TARGET_PREFERRED_RENAME_CLASS
 741 #define TARGET_PREFERRED_RENAME_CLASS \
 742   arm_preferred_rename_class
 743
 744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 746
 747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 749   arm_builtin_vectorization_cost
 750 #undef TARGET_VECTORIZE_ADD_STMT_COST
 751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 752
 753 #undef TARGET_CANONICALIZE_COMPARISON
 754 #define TARGET_CANONICALIZE_COMPARISON \
 755   arm_canonicalize_comparison
 756
 757 #undef TARGET_ASAN_SHADOW_OFFSET
 758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 759
 760 #undef MAX_INSN_PER_IT_BLOCK
 761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 762
 763 #undef TARGET_CAN_USE_DOLOOP_P
 764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 765
 766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 768
 769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 771
 772 #undef TARGET_SCHED_FUSION_PRIORITY
 773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 774
 775 #undef  TARGET_ASM_FUNCTION_SECTION
 776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 777
 778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 780
 781 #undef TARGET_SECTION_TYPE_FLAGS
 782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 783
 784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 786
 787 #undef TARGET_C_EXCESS_PRECISION
 788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 789
 790 /* Although the architecture reserves bits 0 and 1, only the former is
 791    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 794
 795 #undef TARGET_FIXED_CONDITION_CODE_REGS
 796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 797
 798 #undef TARGET_HARD_REGNO_NREGS
 799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 800 #undef TARGET_HARD_REGNO_MODE_OK
 801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 802
 803 #undef TARGET_MODES_TIEABLE_P
 804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 805
 806 #undef TARGET_CAN_CHANGE_MODE_CLASS
 807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 808
 809 #undef TARGET_CONSTANT_ALIGNMENT
 810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 811 \f
 812 /* Obstack for minipool constant handling.  */
 813 static struct obstack minipool_obstack;
 814 static char *         minipool_startobj;
 815
 816 /* The maximum number of insns skipped which
 817    will be conditionalised if possible.  */
 818 static int max_insns_skipped = 5;
 819
 820 extern FILE * asm_out_file;
 821
 822 /* True if we are currently building a constant table.  */
 823 int making_const_table;
 824
 825 /* The processor for which instructions should be scheduled.  */
 826 enum processor_type arm_tune = TARGET_CPU_arm_none;
 827
 828 /* The current tuning set.  */
 829 const struct tune_params *current_tune;
 830
 831 /* Which floating point hardware to schedule for.  */
 832 int arm_fpu_attr;
 833
 834 /* Used for Thumb call_via trampolines.  */
 835 rtx thumb_call_via_label[14];
 836 static int thumb_call_reg_needed;
 837
 838 /* The bits in this mask specify which instruction scheduling options should
 839    be used.  */
 840 unsigned int tune_flags = 0;
 841
 842 /* The highest ARM architecture version supported by the
 843    target.  */
 844 enum base_architecture arm_base_arch = BASE_ARCH_0;
 845
 846 /* Active target architecture and tuning.  */
 847
 848 struct arm_build_target arm_active_target;
 849
 850 /* The following are used in the arm.md file as equivalents to bits
 851    in the above two flag variables.  */
 852
 853 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 854 int arm_arch4 = 0;
 855
 856 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 857 int arm_arch4t = 0;
 858
 859 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 860 int arm_arch5t = 0;
 861
 862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 863 int arm_arch5te = 0;
 864
 865 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 866 int arm_arch6 = 0;
 867
 868 /* Nonzero if this chip supports the ARM 6K extensions.  */
 869 int arm_arch6k = 0;
 870
 871 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 872 int arm_arch6kz = 0;
 873
 874 /* Nonzero if instructions present in ARMv6-M can be used.  */
 875 int arm_arch6m = 0;
 876
 877 /* Nonzero if this chip supports the ARM 7 extensions.  */
 878 int arm_arch7 = 0;
 879
 880 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 881 int arm_arch_lpae = 0;
 882
 883 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 884 int arm_arch_notm = 0;
 885
 886 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 887 int arm_arch7em = 0;
 888
 889 /* Nonzero if instructions present in ARMv8 can be used.  */
 890 int arm_arch8 = 0;
 891
 892 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 893 int arm_arch8_1 = 0;
 894
 895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 896 int arm_arch8_2 = 0;
 897
 898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 899    Architecture 8.2.  */
 900 int arm_fp16_inst = 0;
 901
 902 /* Nonzero if this chip can benefit from load scheduling.  */
 903 int arm_ld_sched = 0;
 904
 905 /* Nonzero if this chip is a StrongARM.  */
 906 int arm_tune_strongarm = 0;
 907
 908 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 909 int arm_arch_iwmmxt = 0;
 910
 911 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 912 int arm_arch_iwmmxt2 = 0;
 913
 914 /* Nonzero if this chip is an XScale.  */
 915 int arm_arch_xscale = 0;
 916
 917 /* Nonzero if tuning for XScale  */
 918 int arm_tune_xscale = 0;
 919
 920 /* Nonzero if we want to tune for stores that access the write-buffer.
 921    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 922 int arm_tune_wbuf = 0;
 923
 924 /* Nonzero if tuning for Cortex-A9.  */
 925 int arm_tune_cortex_a9 = 0;
 926
 927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 928    preprocessor.
 929    XXX This is a bit of a hack, it's intended to help work around
 930    problems in GLD which doesn't understand that armv5t code is
 931    interworking clean.  */
 932 int arm_cpp_interwork = 0;
 933
 934 /* Nonzero if chip supports Thumb 1.  */
 935 int arm_arch_thumb1;
 936
 937 /* Nonzero if chip supports Thumb 2.  */
 938 int arm_arch_thumb2;
 939
 940 /* Nonzero if chip supports integer division instruction.  */
 941 int arm_arch_arm_hwdiv;
 942 int arm_arch_thumb_hwdiv;
 943
 944 /* Nonzero if chip disallows volatile memory access in IT block.  */
 945 int arm_arch_no_volatile_ce;
 946
 947 /* Nonzero if we should use Neon to handle 64-bits operations rather
 948    than core registers.  */
 949 int prefer_neon_for_64bits = 0;
 950
 951 /* Nonzero if we shouldn't use literal pools.  */
 952 bool arm_disable_literal_pool = false;
 953
 954 /* The register number to be used for the PIC offset register.  */
 955 unsigned arm_pic_register = INVALID_REGNUM;
 956
 957 enum arm_pcs arm_pcs_default;
 958
 959 /* For an explanation of these variables, see final_prescan_insn below.  */
 960 int arm_ccfsm_state;
 961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 962 enum arm_cond_code arm_current_cc;
 963
 964 rtx arm_target_insn;
 965 int arm_target_label;
 966 /* The number of conditionally executed insns, including the current insn.  */
 967 int arm_condexec_count = 0;
 968 /* A bitmask specifying the patterns for the IT block.
 969    Zero means do not output an IT block before this insn. */
 970 int arm_condexec_mask = 0;
 971 /* The number of bits used in arm_condexec_mask.  */
 972 int arm_condexec_masklen = 0;
 973
 974 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 975 int arm_arch_crc = 0;
 976
 977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
 978 int arm_arch_dotprod = 0;
 979
 980 /* Nonzero if chip supports the ARMv8-M security extensions.  */
 981 int arm_arch_cmse = 0;
 982
 983 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 984 int arm_m_profile_small_mul = 0;
 985
 986 /* The condition codes of the ARM, and the inverse function.  */
 987 static const char * const arm_condition_codes[] =
 988 {
 989   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 990   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 991 };
 992
 993 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 994 int arm_regs_in_sequence[] =
 995 {
 996   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 997 };
 998
 999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1001
1002 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
1005 \f
1006 /* Initialization code.  */
1007
1008 struct cpu_tune
1009 {
1010   enum processor_type scheduler;
1011   unsigned int tune_flags;
1012   const struct tune_params *tune;
1013 };
1014
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017   {                                                             \
1018     num_slots,                                                  \
1019     l1_size,                                                    \
1020     l1_line_size                                                \
1021   }
1022
1023 /* arm generic vectorizer costs.  */
1024 static const
1025 struct cpu_vec_costs arm_default_vec_cost = {
1026   1,                                    /* scalar_stmt_cost.  */
1027   1,                                    /* scalar load_cost.  */
1028   1,                                    /* scalar_store_cost.  */
1029   1,                                    /* vec_stmt_cost.  */
1030   1,                                    /* vec_to_scalar_cost.  */
1031   1,                                    /* scalar_to_vec_cost.  */
1032   1,                                    /* vec_align_load_cost.  */
1033   1,                                    /* vec_unalign_load_cost.  */
1034   1,                                    /* vec_unalign_store_cost.  */
1035   1,                                    /* vec_store_cost.  */
1036   3,                                    /* cond_taken_branch_cost.  */
1037   1,                                    /* cond_not_taken_branch_cost.  */
1038 };
1039
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1041 #include "aarch-cost-tables.h"
1042
1043
1044
1045 const struct cpu_cost_table cortexa9_extra_costs =
1046 {
1047   /* ALU */
1048   {
1049     0,                  /* arith.  */
1050     0,                  /* logical.  */
1051     0,                  /* shift.  */
1052     COSTS_N_INSNS (1),  /* shift_reg.  */
1053     COSTS_N_INSNS (1),  /* arith_shift.  */
1054     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1055     0,                  /* log_shift.  */
1056     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1057     COSTS_N_INSNS (1),  /* extend.  */
1058     COSTS_N_INSNS (2),  /* extend_arith.  */
1059     COSTS_N_INSNS (1),  /* bfi.  */
1060     COSTS_N_INSNS (1),  /* bfx.  */
1061     0,                  /* clz.  */
1062     0,                  /* rev.  */
1063     0,                  /* non_exec.  */
1064     true                /* non_exec_costs_exec.  */
1065   },
1066   {
1067     /* MULT SImode */
1068     {
1069       COSTS_N_INSNS (3),        /* simple.  */
1070       COSTS_N_INSNS (3),        /* flag_setting.  */
1071       COSTS_N_INSNS (2),        /* extend.  */
1072       COSTS_N_INSNS (3),        /* add.  */
1073       COSTS_N_INSNS (2),        /* extend_add.  */
1074       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1075     },
1076     /* MULT DImode */
1077     {
1078       0,                        /* simple (N/A).  */
1079       0,                        /* flag_setting (N/A).  */
1080       COSTS_N_INSNS (4),        /* extend.  */
1081       0,                        /* add (N/A).  */
1082       COSTS_N_INSNS (4),        /* extend_add.  */
1083       0                         /* idiv (N/A).  */
1084     }
1085   },
1086   /* LD/ST */
1087   {
1088     COSTS_N_INSNS (2),  /* load.  */
1089     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1090     COSTS_N_INSNS (2),  /* ldrd.  */
1091     COSTS_N_INSNS (2),  /* ldm_1st.  */
1092     1,                  /* ldm_regs_per_insn_1st.  */
1093     2,                  /* ldm_regs_per_insn_subsequent.  */
1094     COSTS_N_INSNS (5),  /* loadf.  */
1095     COSTS_N_INSNS (5),  /* loadd.  */
1096     COSTS_N_INSNS (1),  /* load_unaligned.  */
1097     COSTS_N_INSNS (2),  /* store.  */
1098     COSTS_N_INSNS (2),  /* strd.  */
1099     COSTS_N_INSNS (2),  /* stm_1st.  */
1100     1,                  /* stm_regs_per_insn_1st.  */
1101     2,                  /* stm_regs_per_insn_subsequent.  */
1102     COSTS_N_INSNS (1),  /* storef.  */
1103     COSTS_N_INSNS (1),  /* stored.  */
1104     COSTS_N_INSNS (1),  /* store_unaligned.  */
1105     COSTS_N_INSNS (1),  /* loadv.  */
1106     COSTS_N_INSNS (1)   /* storev.  */
1107   },
1108   {
1109     /* FP SFmode */
1110     {
1111       COSTS_N_INSNS (14),       /* div.  */
1112       COSTS_N_INSNS (4),        /* mult.  */
1113       COSTS_N_INSNS (7),        /* mult_addsub. */
1114       COSTS_N_INSNS (30),       /* fma.  */
1115       COSTS_N_INSNS (3),        /* addsub.  */
1116       COSTS_N_INSNS (1),        /* fpconst.  */
1117       COSTS_N_INSNS (1),        /* neg.  */
1118       COSTS_N_INSNS (3),        /* compare.  */
1119       COSTS_N_INSNS (3),        /* widen.  */
1120       COSTS_N_INSNS (3),        /* narrow.  */
1121       COSTS_N_INSNS (3),        /* toint.  */
1122       COSTS_N_INSNS (3),        /* fromint.  */
1123       COSTS_N_INSNS (3)         /* roundint.  */
1124     },
1125     /* FP DFmode */
1126     {
1127       COSTS_N_INSNS (24),       /* div.  */
1128       COSTS_N_INSNS (5),        /* mult.  */
1129       COSTS_N_INSNS (8),        /* mult_addsub.  */
1130       COSTS_N_INSNS (30),       /* fma.  */
1131       COSTS_N_INSNS (3),        /* addsub.  */
1132       COSTS_N_INSNS (1),        /* fpconst.  */
1133       COSTS_N_INSNS (1),        /* neg.  */
1134       COSTS_N_INSNS (3),        /* compare.  */
1135       COSTS_N_INSNS (3),        /* widen.  */
1136       COSTS_N_INSNS (3),        /* narrow.  */
1137       COSTS_N_INSNS (3),        /* toint.  */
1138       COSTS_N_INSNS (3),        /* fromint.  */
1139       COSTS_N_INSNS (3)         /* roundint.  */
1140     }
1141   },
1142   /* Vector */
1143   {
1144     COSTS_N_INSNS (1)   /* alu.  */
1145   }
1146 };
1147
1148 const struct cpu_cost_table cortexa8_extra_costs =
1149 {
1150   /* ALU */
1151   {
1152     0,                  /* arith.  */
1153     0,                  /* logical.  */
1154     COSTS_N_INSNS (1),  /* shift.  */
1155     0,                  /* shift_reg.  */
1156     COSTS_N_INSNS (1),  /* arith_shift.  */
1157     0,                  /* arith_shift_reg.  */
1158     COSTS_N_INSNS (1),  /* log_shift.  */
1159     0,                  /* log_shift_reg.  */
1160     0,                  /* extend.  */
1161     0,                  /* extend_arith.  */
1162     0,                  /* bfi.  */
1163     0,                  /* bfx.  */
1164     0,                  /* clz.  */
1165     0,                  /* rev.  */
1166     0,                  /* non_exec.  */
1167     true                /* non_exec_costs_exec.  */
1168   },
1169   {
1170     /* MULT SImode */
1171     {
1172       COSTS_N_INSNS (1),        /* simple.  */
1173       COSTS_N_INSNS (1),        /* flag_setting.  */
1174       COSTS_N_INSNS (1),        /* extend.  */
1175       COSTS_N_INSNS (1),        /* add.  */
1176       COSTS_N_INSNS (1),        /* extend_add.  */
1177       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1178     },
1179     /* MULT DImode */
1180     {
1181       0,                        /* simple (N/A).  */
1182       0,                        /* flag_setting (N/A).  */
1183       COSTS_N_INSNS (2),        /* extend.  */
1184       0,                        /* add (N/A).  */
1185       COSTS_N_INSNS (2),        /* extend_add.  */
1186       0                         /* idiv (N/A).  */
1187     }
1188   },
1189   /* LD/ST */
1190   {
1191     COSTS_N_INSNS (1),  /* load.  */
1192     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1193     COSTS_N_INSNS (1),  /* ldrd.  */
1194     COSTS_N_INSNS (1),  /* ldm_1st.  */
1195     1,                  /* ldm_regs_per_insn_1st.  */
1196     2,                  /* ldm_regs_per_insn_subsequent.  */
1197     COSTS_N_INSNS (1),  /* loadf.  */
1198     COSTS_N_INSNS (1),  /* loadd.  */
1199     COSTS_N_INSNS (1),  /* load_unaligned.  */
1200     COSTS_N_INSNS (1),  /* store.  */
1201     COSTS_N_INSNS (1),  /* strd.  */
1202     COSTS_N_INSNS (1),  /* stm_1st.  */
1203     1,                  /* stm_regs_per_insn_1st.  */
1204     2,                  /* stm_regs_per_insn_subsequent.  */
1205     COSTS_N_INSNS (1),  /* storef.  */
1206     COSTS_N_INSNS (1),  /* stored.  */
1207     COSTS_N_INSNS (1),  /* store_unaligned.  */
1208     COSTS_N_INSNS (1),  /* loadv.  */
1209     COSTS_N_INSNS (1)   /* storev.  */
1210   },
1211   {
1212     /* FP SFmode */
1213     {
1214       COSTS_N_INSNS (36),       /* div.  */
1215       COSTS_N_INSNS (11),       /* mult.  */
1216       COSTS_N_INSNS (20),       /* mult_addsub. */
1217       COSTS_N_INSNS (30),       /* fma.  */
1218       COSTS_N_INSNS (9),        /* addsub.  */
1219       COSTS_N_INSNS (3),        /* fpconst.  */
1220       COSTS_N_INSNS (3),        /* neg.  */
1221       COSTS_N_INSNS (6),        /* compare.  */
1222       COSTS_N_INSNS (4),        /* widen.  */
1223       COSTS_N_INSNS (4),        /* narrow.  */
1224       COSTS_N_INSNS (8),        /* toint.  */
1225       COSTS_N_INSNS (8),        /* fromint.  */
1226       COSTS_N_INSNS (8)         /* roundint.  */
1227     },
1228     /* FP DFmode */
1229     {
1230       COSTS_N_INSNS (64),       /* div.  */
1231       COSTS_N_INSNS (16),       /* mult.  */
1232       COSTS_N_INSNS (25),       /* mult_addsub.  */
1233       COSTS_N_INSNS (30),       /* fma.  */
1234       COSTS_N_INSNS (9),        /* addsub.  */
1235       COSTS_N_INSNS (3),        /* fpconst.  */
1236       COSTS_N_INSNS (3),        /* neg.  */
1237       COSTS_N_INSNS (6),        /* compare.  */
1238       COSTS_N_INSNS (6),        /* widen.  */
1239       COSTS_N_INSNS (6),        /* narrow.  */
1240       COSTS_N_INSNS (8),        /* toint.  */
1241       COSTS_N_INSNS (8),        /* fromint.  */
1242       COSTS_N_INSNS (8)         /* roundint.  */
1243     }
1244   },
1245   /* Vector */
1246   {
1247     COSTS_N_INSNS (1)   /* alu.  */
1248   }
1249 };
1250
1251 const struct cpu_cost_table cortexa5_extra_costs =
1252 {
1253   /* ALU */
1254   {
1255     0,                  /* arith.  */
1256     0,                  /* logical.  */
1257     COSTS_N_INSNS (1),  /* shift.  */
1258     COSTS_N_INSNS (1),  /* shift_reg.  */
1259     COSTS_N_INSNS (1),  /* arith_shift.  */
1260     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1261     COSTS_N_INSNS (1),  /* log_shift.  */
1262     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1263     COSTS_N_INSNS (1),  /* extend.  */
1264     COSTS_N_INSNS (1),  /* extend_arith.  */
1265     COSTS_N_INSNS (1),  /* bfi.  */
1266     COSTS_N_INSNS (1),  /* bfx.  */
1267     COSTS_N_INSNS (1),  /* clz.  */
1268     COSTS_N_INSNS (1),  /* rev.  */
1269     0,                  /* non_exec.  */
1270     true                /* non_exec_costs_exec.  */
1271   },
1272
1273   {
1274     /* MULT SImode */
1275     {
1276       0,                        /* simple.  */
1277       COSTS_N_INSNS (1),        /* flag_setting.  */
1278       COSTS_N_INSNS (1),        /* extend.  */
1279       COSTS_N_INSNS (1),        /* add.  */
1280       COSTS_N_INSNS (1),        /* extend_add.  */
1281       COSTS_N_INSNS (7)         /* idiv.  */
1282     },
1283     /* MULT DImode */
1284     {
1285       0,                        /* simple (N/A).  */
1286       0,                        /* flag_setting (N/A).  */
1287       COSTS_N_INSNS (1),        /* extend.  */
1288       0,                        /* add.  */
1289       COSTS_N_INSNS (2),        /* extend_add.  */
1290       0                         /* idiv (N/A).  */
1291     }
1292   },
1293   /* LD/ST */
1294   {
1295     COSTS_N_INSNS (1),  /* load.  */
1296     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1297     COSTS_N_INSNS (6),  /* ldrd.  */
1298     COSTS_N_INSNS (1),  /* ldm_1st.  */
1299     1,                  /* ldm_regs_per_insn_1st.  */
1300     2,                  /* ldm_regs_per_insn_subsequent.  */
1301     COSTS_N_INSNS (2),  /* loadf.  */
1302     COSTS_N_INSNS (4),  /* loadd.  */
1303     COSTS_N_INSNS (1),  /* load_unaligned.  */
1304     COSTS_N_INSNS (1),  /* store.  */
1305     COSTS_N_INSNS (3),  /* strd.  */
1306     COSTS_N_INSNS (1),  /* stm_1st.  */
1307     1,                  /* stm_regs_per_insn_1st.  */
1308     2,                  /* stm_regs_per_insn_subsequent.  */
1309     COSTS_N_INSNS (2),  /* storef.  */
1310     COSTS_N_INSNS (2),  /* stored.  */
1311     COSTS_N_INSNS (1),  /* store_unaligned.  */
1312     COSTS_N_INSNS (1),  /* loadv.  */
1313     COSTS_N_INSNS (1)   /* storev.  */
1314   },
1315   {
1316     /* FP SFmode */
1317     {
1318       COSTS_N_INSNS (15),       /* div.  */
1319       COSTS_N_INSNS (3),        /* mult.  */
1320       COSTS_N_INSNS (7),        /* mult_addsub. */
1321       COSTS_N_INSNS (7),        /* fma.  */
1322       COSTS_N_INSNS (3),        /* addsub.  */
1323       COSTS_N_INSNS (3),        /* fpconst.  */
1324       COSTS_N_INSNS (3),        /* neg.  */
1325       COSTS_N_INSNS (3),        /* compare.  */
1326       COSTS_N_INSNS (3),        /* widen.  */
1327       COSTS_N_INSNS (3),        /* narrow.  */
1328       COSTS_N_INSNS (3),        /* toint.  */
1329       COSTS_N_INSNS (3),        /* fromint.  */
1330       COSTS_N_INSNS (3)         /* roundint.  */
1331     },
1332     /* FP DFmode */
1333     {
1334       COSTS_N_INSNS (30),       /* div.  */
1335       COSTS_N_INSNS (6),        /* mult.  */
1336       COSTS_N_INSNS (10),       /* mult_addsub.  */
1337       COSTS_N_INSNS (7),        /* fma.  */
1338       COSTS_N_INSNS (3),        /* addsub.  */
1339       COSTS_N_INSNS (3),        /* fpconst.  */
1340       COSTS_N_INSNS (3),        /* neg.  */
1341       COSTS_N_INSNS (3),        /* compare.  */
1342       COSTS_N_INSNS (3),        /* widen.  */
1343       COSTS_N_INSNS (3),        /* narrow.  */
1344       COSTS_N_INSNS (3),        /* toint.  */
1345       COSTS_N_INSNS (3),        /* fromint.  */
1346       COSTS_N_INSNS (3)         /* roundint.  */
1347     }
1348   },
1349   /* Vector */
1350   {
1351     COSTS_N_INSNS (1)   /* alu.  */
1352   }
1353 };
1354
1355
1356 const struct cpu_cost_table cortexa7_extra_costs =
1357 {
1358   /* ALU */
1359   {
1360     0,                  /* arith.  */
1361     0,                  /* logical.  */
1362     COSTS_N_INSNS (1),  /* shift.  */
1363     COSTS_N_INSNS (1),  /* shift_reg.  */
1364     COSTS_N_INSNS (1),  /* arith_shift.  */
1365     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1366     COSTS_N_INSNS (1),  /* log_shift.  */
1367     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1368     COSTS_N_INSNS (1),  /* extend.  */
1369     COSTS_N_INSNS (1),  /* extend_arith.  */
1370     COSTS_N_INSNS (1),  /* bfi.  */
1371     COSTS_N_INSNS (1),  /* bfx.  */
1372     COSTS_N_INSNS (1),  /* clz.  */
1373     COSTS_N_INSNS (1),  /* rev.  */
1374     0,                  /* non_exec.  */
1375     true                /* non_exec_costs_exec.  */
1376   },
1377
1378   {
1379     /* MULT SImode */
1380     {
1381       0,                        /* simple.  */
1382       COSTS_N_INSNS (1),        /* flag_setting.  */
1383       COSTS_N_INSNS (1),        /* extend.  */
1384       COSTS_N_INSNS (1),        /* add.  */
1385       COSTS_N_INSNS (1),        /* extend_add.  */
1386       COSTS_N_INSNS (7)         /* idiv.  */
1387     },
1388     /* MULT DImode */
1389     {
1390       0,                        /* simple (N/A).  */
1391       0,                        /* flag_setting (N/A).  */
1392       COSTS_N_INSNS (1),        /* extend.  */
1393       0,                        /* add.  */
1394       COSTS_N_INSNS (2),        /* extend_add.  */
1395       0                         /* idiv (N/A).  */
1396     }
1397   },
1398   /* LD/ST */
1399   {
1400     COSTS_N_INSNS (1),  /* load.  */
1401     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1402     COSTS_N_INSNS (3),  /* ldrd.  */
1403     COSTS_N_INSNS (1),  /* ldm_1st.  */
1404     1,                  /* ldm_regs_per_insn_1st.  */
1405     2,                  /* ldm_regs_per_insn_subsequent.  */
1406     COSTS_N_INSNS (2),  /* loadf.  */
1407     COSTS_N_INSNS (2),  /* loadd.  */
1408     COSTS_N_INSNS (1),  /* load_unaligned.  */
1409     COSTS_N_INSNS (1),  /* store.  */
1410     COSTS_N_INSNS (3),  /* strd.  */
1411     COSTS_N_INSNS (1),  /* stm_1st.  */
1412     1,                  /* stm_regs_per_insn_1st.  */
1413     2,                  /* stm_regs_per_insn_subsequent.  */
1414     COSTS_N_INSNS (2),  /* storef.  */
1415     COSTS_N_INSNS (2),  /* stored.  */
1416     COSTS_N_INSNS (1),  /* store_unaligned.  */
1417     COSTS_N_INSNS (1),  /* loadv.  */
1418     COSTS_N_INSNS (1)   /* storev.  */
1419   },
1420   {
1421     /* FP SFmode */
1422     {
1423       COSTS_N_INSNS (15),       /* div.  */
1424       COSTS_N_INSNS (3),        /* mult.  */
1425       COSTS_N_INSNS (7),        /* mult_addsub. */
1426       COSTS_N_INSNS (7),        /* fma.  */
1427       COSTS_N_INSNS (3),        /* addsub.  */
1428       COSTS_N_INSNS (3),        /* fpconst.  */
1429       COSTS_N_INSNS (3),        /* neg.  */
1430       COSTS_N_INSNS (3),        /* compare.  */
1431       COSTS_N_INSNS (3),        /* widen.  */
1432       COSTS_N_INSNS (3),        /* narrow.  */
1433       COSTS_N_INSNS (3),        /* toint.  */
1434       COSTS_N_INSNS (3),        /* fromint.  */
1435       COSTS_N_INSNS (3)         /* roundint.  */
1436     },
1437     /* FP DFmode */
1438     {
1439       COSTS_N_INSNS (30),       /* div.  */
1440       COSTS_N_INSNS (6),        /* mult.  */
1441       COSTS_N_INSNS (10),       /* mult_addsub.  */
1442       COSTS_N_INSNS (7),        /* fma.  */
1443       COSTS_N_INSNS (3),        /* addsub.  */
1444       COSTS_N_INSNS (3),        /* fpconst.  */
1445       COSTS_N_INSNS (3),        /* neg.  */
1446       COSTS_N_INSNS (3),        /* compare.  */
1447       COSTS_N_INSNS (3),        /* widen.  */
1448       COSTS_N_INSNS (3),        /* narrow.  */
1449       COSTS_N_INSNS (3),        /* toint.  */
1450       COSTS_N_INSNS (3),        /* fromint.  */
1451       COSTS_N_INSNS (3)         /* roundint.  */
1452     }
1453   },
1454   /* Vector */
1455   {
1456     COSTS_N_INSNS (1)   /* alu.  */
1457   }
1458 };
1459
1460 const struct cpu_cost_table cortexa12_extra_costs =
1461 {
1462   /* ALU */
1463   {
1464     0,                  /* arith.  */
1465     0,                  /* logical.  */
1466     0,                  /* shift.  */
1467     COSTS_N_INSNS (1),  /* shift_reg.  */
1468     COSTS_N_INSNS (1),  /* arith_shift.  */
1469     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1470     COSTS_N_INSNS (1),  /* log_shift.  */
1471     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1472     0,                  /* extend.  */
1473     COSTS_N_INSNS (1),  /* extend_arith.  */
1474     0,                  /* bfi.  */
1475     COSTS_N_INSNS (1),  /* bfx.  */
1476     COSTS_N_INSNS (1),  /* clz.  */
1477     COSTS_N_INSNS (1),  /* rev.  */
1478     0,                  /* non_exec.  */
1479     true                /* non_exec_costs_exec.  */
1480   },
1481   /* MULT SImode */
1482   {
1483     {
1484       COSTS_N_INSNS (2),        /* simple.  */
1485       COSTS_N_INSNS (3),        /* flag_setting.  */
1486       COSTS_N_INSNS (2),        /* extend.  */
1487       COSTS_N_INSNS (3),        /* add.  */
1488       COSTS_N_INSNS (2),        /* extend_add.  */
1489       COSTS_N_INSNS (18)        /* idiv.  */
1490     },
1491     /* MULT DImode */
1492     {
1493       0,                        /* simple (N/A).  */
1494       0,                        /* flag_setting (N/A).  */
1495       COSTS_N_INSNS (3),        /* extend.  */
1496       0,                        /* add (N/A).  */
1497       COSTS_N_INSNS (3),        /* extend_add.  */
1498       0                         /* idiv (N/A).  */
1499     }
1500   },
1501   /* LD/ST */
1502   {
1503     COSTS_N_INSNS (3),  /* load.  */
1504     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1505     COSTS_N_INSNS (3),  /* ldrd.  */
1506     COSTS_N_INSNS (3),  /* ldm_1st.  */
1507     1,                  /* ldm_regs_per_insn_1st.  */
1508     2,                  /* ldm_regs_per_insn_subsequent.  */
1509     COSTS_N_INSNS (3),  /* loadf.  */
1510     COSTS_N_INSNS (3),  /* loadd.  */
1511     0,                  /* load_unaligned.  */
1512     0,                  /* store.  */
1513     0,                  /* strd.  */
1514     0,                  /* stm_1st.  */
1515     1,                  /* stm_regs_per_insn_1st.  */
1516     2,                  /* stm_regs_per_insn_subsequent.  */
1517     COSTS_N_INSNS (2),  /* storef.  */
1518     COSTS_N_INSNS (2),  /* stored.  */
1519     0,                  /* store_unaligned.  */
1520     COSTS_N_INSNS (1),  /* loadv.  */
1521     COSTS_N_INSNS (1)   /* storev.  */
1522   },
1523   {
1524     /* FP SFmode */
1525     {
1526       COSTS_N_INSNS (17),       /* div.  */
1527       COSTS_N_INSNS (4),        /* mult.  */
1528       COSTS_N_INSNS (8),        /* mult_addsub. */
1529       COSTS_N_INSNS (8),        /* fma.  */
1530       COSTS_N_INSNS (4),        /* addsub.  */
1531       COSTS_N_INSNS (2),        /* fpconst. */
1532       COSTS_N_INSNS (2),        /* neg.  */
1533       COSTS_N_INSNS (2),        /* compare.  */
1534       COSTS_N_INSNS (4),        /* widen.  */
1535       COSTS_N_INSNS (4),        /* narrow.  */
1536       COSTS_N_INSNS (4),        /* toint.  */
1537       COSTS_N_INSNS (4),        /* fromint.  */
1538       COSTS_N_INSNS (4)         /* roundint.  */
1539     },
1540     /* FP DFmode */
1541     {
1542       COSTS_N_INSNS (31),       /* div.  */
1543       COSTS_N_INSNS (4),        /* mult.  */
1544       COSTS_N_INSNS (8),        /* mult_addsub.  */
1545       COSTS_N_INSNS (8),        /* fma.  */
1546       COSTS_N_INSNS (4),        /* addsub.  */
1547       COSTS_N_INSNS (2),        /* fpconst.  */
1548       COSTS_N_INSNS (2),        /* neg.  */
1549       COSTS_N_INSNS (2),        /* compare.  */
1550       COSTS_N_INSNS (4),        /* widen.  */
1551       COSTS_N_INSNS (4),        /* narrow.  */
1552       COSTS_N_INSNS (4),        /* toint.  */
1553       COSTS_N_INSNS (4),        /* fromint.  */
1554       COSTS_N_INSNS (4)         /* roundint.  */
1555     }
1556   },
1557   /* Vector */
1558   {
1559     COSTS_N_INSNS (1)   /* alu.  */
1560   }
1561 };
1562
1563 const struct cpu_cost_table cortexa15_extra_costs =
1564 {
1565   /* ALU */
1566   {
1567     0,                  /* arith.  */
1568     0,                  /* logical.  */
1569     0,                  /* shift.  */
1570     0,                  /* shift_reg.  */
1571     COSTS_N_INSNS (1),  /* arith_shift.  */
1572     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1573     COSTS_N_INSNS (1),  /* log_shift.  */
1574     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1575     0,                  /* extend.  */
1576     COSTS_N_INSNS (1),  /* extend_arith.  */
1577     COSTS_N_INSNS (1),  /* bfi.  */
1578     0,                  /* bfx.  */
1579     0,                  /* clz.  */
1580     0,                  /* rev.  */
1581     0,                  /* non_exec.  */
1582     true                /* non_exec_costs_exec.  */
1583   },
1584   /* MULT SImode */
1585   {
1586     {
1587       COSTS_N_INSNS (2),        /* simple.  */
1588       COSTS_N_INSNS (3),        /* flag_setting.  */
1589       COSTS_N_INSNS (2),        /* extend.  */
1590       COSTS_N_INSNS (2),        /* add.  */
1591       COSTS_N_INSNS (2),        /* extend_add.  */
1592       COSTS_N_INSNS (18)        /* idiv.  */
1593     },
1594     /* MULT DImode */
1595     {
1596       0,                        /* simple (N/A).  */
1597       0,                        /* flag_setting (N/A).  */
1598       COSTS_N_INSNS (3),        /* extend.  */
1599       0,                        /* add (N/A).  */
1600       COSTS_N_INSNS (3),        /* extend_add.  */
1601       0                         /* idiv (N/A).  */
1602     }
1603   },
1604   /* LD/ST */
1605   {
1606     COSTS_N_INSNS (3),  /* load.  */
1607     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1608     COSTS_N_INSNS (3),  /* ldrd.  */
1609     COSTS_N_INSNS (4),  /* ldm_1st.  */
1610     1,                  /* ldm_regs_per_insn_1st.  */
1611     2,                  /* ldm_regs_per_insn_subsequent.  */
1612     COSTS_N_INSNS (4),  /* loadf.  */
1613     COSTS_N_INSNS (4),  /* loadd.  */
1614     0,                  /* load_unaligned.  */
1615     0,                  /* store.  */
1616     0,                  /* strd.  */
1617     COSTS_N_INSNS (1),  /* stm_1st.  */
1618     1,                  /* stm_regs_per_insn_1st.  */
1619     2,                  /* stm_regs_per_insn_subsequent.  */
1620     0,                  /* storef.  */
1621     0,                  /* stored.  */
1622     0,                  /* store_unaligned.  */
1623     COSTS_N_INSNS (1),  /* loadv.  */
1624     COSTS_N_INSNS (1)   /* storev.  */
1625   },
1626   {
1627     /* FP SFmode */
1628     {
1629       COSTS_N_INSNS (17),       /* div.  */
1630       COSTS_N_INSNS (4),        /* mult.  */
1631       COSTS_N_INSNS (8),        /* mult_addsub. */
1632       COSTS_N_INSNS (8),        /* fma.  */
1633       COSTS_N_INSNS (4),        /* addsub.  */
1634       COSTS_N_INSNS (2),        /* fpconst. */
1635       COSTS_N_INSNS (2),        /* neg.  */
1636       COSTS_N_INSNS (5),        /* compare.  */
1637       COSTS_N_INSNS (4),        /* widen.  */
1638       COSTS_N_INSNS (4),        /* narrow.  */
1639       COSTS_N_INSNS (4),        /* toint.  */
1640       COSTS_N_INSNS (4),        /* fromint.  */
1641       COSTS_N_INSNS (4)         /* roundint.  */
1642     },
1643     /* FP DFmode */
1644     {
1645       COSTS_N_INSNS (31),       /* div.  */
1646       COSTS_N_INSNS (4),        /* mult.  */
1647       COSTS_N_INSNS (8),        /* mult_addsub.  */
1648       COSTS_N_INSNS (8),        /* fma.  */
1649       COSTS_N_INSNS (4),        /* addsub.  */
1650       COSTS_N_INSNS (2),        /* fpconst.  */
1651       COSTS_N_INSNS (2),        /* neg.  */
1652       COSTS_N_INSNS (2),        /* compare.  */
1653       COSTS_N_INSNS (4),        /* widen.  */
1654       COSTS_N_INSNS (4),        /* narrow.  */
1655       COSTS_N_INSNS (4),        /* toint.  */
1656       COSTS_N_INSNS (4),        /* fromint.  */
1657       COSTS_N_INSNS (4)         /* roundint.  */
1658     }
1659   },
1660   /* Vector */
1661   {
1662     COSTS_N_INSNS (1)   /* alu.  */
1663   }
1664 };
1665
1666 const struct cpu_cost_table v7m_extra_costs =
1667 {
1668   /* ALU */
1669   {
1670     0,                  /* arith.  */
1671     0,                  /* logical.  */
1672     0,                  /* shift.  */
1673     0,                  /* shift_reg.  */
1674     0,                  /* arith_shift.  */
1675     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1676     0,                  /* log_shift.  */
1677     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1678     0,                  /* extend.  */
1679     COSTS_N_INSNS (1),  /* extend_arith.  */
1680     0,                  /* bfi.  */
1681     0,                  /* bfx.  */
1682     0,                  /* clz.  */
1683     0,                  /* rev.  */
1684     COSTS_N_INSNS (1),  /* non_exec.  */
1685     false               /* non_exec_costs_exec.  */
1686   },
1687   {
1688     /* MULT SImode */
1689     {
1690       COSTS_N_INSNS (1),        /* simple.  */
1691       COSTS_N_INSNS (1),        /* flag_setting.  */
1692       COSTS_N_INSNS (2),        /* extend.  */
1693       COSTS_N_INSNS (1),        /* add.  */
1694       COSTS_N_INSNS (3),        /* extend_add.  */
1695       COSTS_N_INSNS (8)         /* idiv.  */
1696     },
1697     /* MULT DImode */
1698     {
1699       0,                        /* simple (N/A).  */
1700       0,                        /* flag_setting (N/A).  */
1701       COSTS_N_INSNS (2),        /* extend.  */
1702       0,                        /* add (N/A).  */
1703       COSTS_N_INSNS (3),        /* extend_add.  */
1704       0                         /* idiv (N/A).  */
1705     }
1706   },
1707   /* LD/ST */
1708   {
1709     COSTS_N_INSNS (2),  /* load.  */
1710     0,                  /* load_sign_extend.  */
1711     COSTS_N_INSNS (3),  /* ldrd.  */
1712     COSTS_N_INSNS (2),  /* ldm_1st.  */
1713     1,                  /* ldm_regs_per_insn_1st.  */
1714     1,                  /* ldm_regs_per_insn_subsequent.  */
1715     COSTS_N_INSNS (2),  /* loadf.  */
1716     COSTS_N_INSNS (3),  /* loadd.  */
1717     COSTS_N_INSNS (1),  /* load_unaligned.  */
1718     COSTS_N_INSNS (2),  /* store.  */
1719     COSTS_N_INSNS (3),  /* strd.  */
1720     COSTS_N_INSNS (2),  /* stm_1st.  */
1721     1,                  /* stm_regs_per_insn_1st.  */
1722     1,                  /* stm_regs_per_insn_subsequent.  */
1723     COSTS_N_INSNS (2),  /* storef.  */
1724     COSTS_N_INSNS (3),  /* stored.  */
1725     COSTS_N_INSNS (1),  /* store_unaligned.  */
1726     COSTS_N_INSNS (1),  /* loadv.  */
1727     COSTS_N_INSNS (1)   /* storev.  */
1728   },
1729   {
1730     /* FP SFmode */
1731     {
1732       COSTS_N_INSNS (7),        /* div.  */
1733       COSTS_N_INSNS (2),        /* mult.  */
1734       COSTS_N_INSNS (5),        /* mult_addsub.  */
1735       COSTS_N_INSNS (3),        /* fma.  */
1736       COSTS_N_INSNS (1),        /* addsub.  */
1737       0,                        /* fpconst.  */
1738       0,                        /* neg.  */
1739       0,                        /* compare.  */
1740       0,                        /* widen.  */
1741       0,                        /* narrow.  */
1742       0,                        /* toint.  */
1743       0,                        /* fromint.  */
1744       0                         /* roundint.  */
1745     },
1746     /* FP DFmode */
1747     {
1748       COSTS_N_INSNS (15),       /* div.  */
1749       COSTS_N_INSNS (5),        /* mult.  */
1750       COSTS_N_INSNS (7),        /* mult_addsub.  */
1751       COSTS_N_INSNS (7),        /* fma.  */
1752       COSTS_N_INSNS (3),        /* addsub.  */
1753       0,                        /* fpconst.  */
1754       0,                        /* neg.  */
1755       0,                        /* compare.  */
1756       0,                        /* widen.  */
1757       0,                        /* narrow.  */
1758       0,                        /* toint.  */
1759       0,                        /* fromint.  */
1760       0                         /* roundint.  */
1761     }
1762   },
1763   /* Vector */
1764   {
1765     COSTS_N_INSNS (1)   /* alu.  */
1766   }
1767 };
1768
1769 const struct addr_mode_cost_table generic_addr_mode_costs =
1770 {
1771   /* int.  */
1772   {
1773     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1774     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1775     COSTS_N_INSNS (0)   /* AMO_WB.  */
1776   },
1777   /* float.  */
1778   {
1779     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1780     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1781     COSTS_N_INSNS (0)   /* AMO_WB.  */
1782   },
1783   /* vector.  */
1784   {
1785     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1786     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1787     COSTS_N_INSNS (0)   /* AMO_WB.  */
1788   }
1789 };
1790
1791 const struct tune_params arm_slowmul_tune =
1792 {
1793   &generic_extra_costs,                 /* Insn extra costs.  */
1794   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1795   NULL,                                 /* Sched adj cost.  */
1796   arm_default_branch_cost,
1797   &arm_default_vec_cost,
1798   3,                                            /* Constant limit.  */
1799   5,                                            /* Max cond insns.  */
1800   8,                                            /* Memset max inline.  */
1801   1,                                            /* Issue rate.  */
1802   ARM_PREFETCH_NOT_BENEFICIAL,
1803   tune_params::PREF_CONST_POOL_TRUE,
1804   tune_params::PREF_LDRD_FALSE,
1805   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1806   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1807   tune_params::DISPARAGE_FLAGS_NEITHER,
1808   tune_params::PREF_NEON_64_FALSE,
1809   tune_params::PREF_NEON_STRINGOPS_FALSE,
1810   tune_params::FUSE_NOTHING,
1811   tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816   &generic_extra_costs,                 /* Insn extra costs.  */
1817   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1818   NULL,                                 /* Sched adj cost.  */
1819   arm_default_branch_cost,
1820   &arm_default_vec_cost,
1821   1,                                            /* Constant limit.  */
1822   5,                                            /* Max cond insns.  */
1823   8,                                            /* Memset max inline.  */
1824   1,                                            /* Issue rate.  */
1825   ARM_PREFETCH_NOT_BENEFICIAL,
1826   tune_params::PREF_CONST_POOL_TRUE,
1827   tune_params::PREF_LDRD_FALSE,
1828   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1829   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1830   tune_params::DISPARAGE_FLAGS_NEITHER,
1831   tune_params::PREF_NEON_64_FALSE,
1832   tune_params::PREF_NEON_STRINGOPS_FALSE,
1833   tune_params::FUSE_NOTHING,
1834   tune_params::SCHED_AUTOPREF_OFF
1835 };
1836
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1839
1840 const struct tune_params arm_strongarm_tune =
1841 {
1842   &generic_extra_costs,                 /* Insn extra costs.  */
1843   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1844   NULL,                                 /* Sched adj cost.  */
1845   arm_default_branch_cost,
1846   &arm_default_vec_cost,
1847   1,                                            /* Constant limit.  */
1848   3,                                            /* Max cond insns.  */
1849   8,                                            /* Memset max inline.  */
1850   1,                                            /* Issue rate.  */
1851   ARM_PREFETCH_NOT_BENEFICIAL,
1852   tune_params::PREF_CONST_POOL_TRUE,
1853   tune_params::PREF_LDRD_FALSE,
1854   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1855   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1856   tune_params::DISPARAGE_FLAGS_NEITHER,
1857   tune_params::PREF_NEON_64_FALSE,
1858   tune_params::PREF_NEON_STRINGOPS_FALSE,
1859   tune_params::FUSE_NOTHING,
1860   tune_params::SCHED_AUTOPREF_OFF
1861 };
1862
1863 const struct tune_params arm_xscale_tune =
1864 {
1865   &generic_extra_costs,                 /* Insn extra costs.  */
1866   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1867   xscale_sched_adjust_cost,
1868   arm_default_branch_cost,
1869   &arm_default_vec_cost,
1870   2,                                            /* Constant limit.  */
1871   3,                                            /* Max cond insns.  */
1872   8,                                            /* Memset max inline.  */
1873   1,                                            /* Issue rate.  */
1874   ARM_PREFETCH_NOT_BENEFICIAL,
1875   tune_params::PREF_CONST_POOL_TRUE,
1876   tune_params::PREF_LDRD_FALSE,
1877   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1878   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1879   tune_params::DISPARAGE_FLAGS_NEITHER,
1880   tune_params::PREF_NEON_64_FALSE,
1881   tune_params::PREF_NEON_STRINGOPS_FALSE,
1882   tune_params::FUSE_NOTHING,
1883   tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 const struct tune_params arm_9e_tune =
1887 {
1888   &generic_extra_costs,                 /* Insn extra costs.  */
1889   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1890   NULL,                                 /* Sched adj cost.  */
1891   arm_default_branch_cost,
1892   &arm_default_vec_cost,
1893   1,                                            /* Constant limit.  */
1894   5,                                            /* Max cond insns.  */
1895   8,                                            /* Memset max inline.  */
1896   1,                                            /* Issue rate.  */
1897   ARM_PREFETCH_NOT_BENEFICIAL,
1898   tune_params::PREF_CONST_POOL_TRUE,
1899   tune_params::PREF_LDRD_FALSE,
1900   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1901   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1902   tune_params::DISPARAGE_FLAGS_NEITHER,
1903   tune_params::PREF_NEON_64_FALSE,
1904   tune_params::PREF_NEON_STRINGOPS_FALSE,
1905   tune_params::FUSE_NOTHING,
1906   tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911   &generic_extra_costs,                 /* Insn extra costs.  */
1912   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1913   NULL,                                 /* Sched adj cost.  */
1914   arm_default_branch_cost,
1915   &arm_default_vec_cost,
1916   1,                                            /* Constant limit.  */
1917   5,                                            /* Max cond insns.  */
1918   8,                                            /* Memset max inline.  */
1919   2,                                            /* Issue rate.  */
1920   ARM_PREFETCH_NOT_BENEFICIAL,
1921   tune_params::PREF_CONST_POOL_TRUE,
1922   tune_params::PREF_LDRD_FALSE,
1923   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1925   tune_params::DISPARAGE_FLAGS_NEITHER,
1926   tune_params::PREF_NEON_64_FALSE,
1927   tune_params::PREF_NEON_STRINGOPS_FALSE,
1928   tune_params::FUSE_NOTHING,
1929   tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 const struct tune_params arm_v6t2_tune =
1933 {
1934   &generic_extra_costs,                 /* Insn extra costs.  */
1935   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1936   NULL,                                 /* Sched adj cost.  */
1937   arm_default_branch_cost,
1938   &arm_default_vec_cost,
1939   1,                                            /* Constant limit.  */
1940   5,                                            /* Max cond insns.  */
1941   8,                                            /* Memset max inline.  */
1942   1,                                            /* Issue rate.  */
1943   ARM_PREFETCH_NOT_BENEFICIAL,
1944   tune_params::PREF_CONST_POOL_FALSE,
1945   tune_params::PREF_LDRD_FALSE,
1946   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1947   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1948   tune_params::DISPARAGE_FLAGS_NEITHER,
1949   tune_params::PREF_NEON_64_FALSE,
1950   tune_params::PREF_NEON_STRINGOPS_FALSE,
1951   tune_params::FUSE_NOTHING,
1952   tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955
1956 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1957 const struct tune_params arm_cortex_tune =
1958 {
1959   &generic_extra_costs,
1960   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1961   NULL,                                 /* Sched adj cost.  */
1962   arm_default_branch_cost,
1963   &arm_default_vec_cost,
1964   1,                                            /* Constant limit.  */
1965   5,                                            /* Max cond insns.  */
1966   8,                                            /* Memset max inline.  */
1967   2,                                            /* Issue rate.  */
1968   ARM_PREFETCH_NOT_BENEFICIAL,
1969   tune_params::PREF_CONST_POOL_FALSE,
1970   tune_params::PREF_LDRD_FALSE,
1971   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1972   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1973   tune_params::DISPARAGE_FLAGS_NEITHER,
1974   tune_params::PREF_NEON_64_FALSE,
1975   tune_params::PREF_NEON_STRINGOPS_FALSE,
1976   tune_params::FUSE_NOTHING,
1977   tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982   &cortexa8_extra_costs,
1983   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1984   NULL,                                 /* Sched adj cost.  */
1985   arm_default_branch_cost,
1986   &arm_default_vec_cost,
1987   1,                                            /* Constant limit.  */
1988   5,                                            /* Max cond insns.  */
1989   8,                                            /* Memset max inline.  */
1990   2,                                            /* Issue rate.  */
1991   ARM_PREFETCH_NOT_BENEFICIAL,
1992   tune_params::PREF_CONST_POOL_FALSE,
1993   tune_params::PREF_LDRD_FALSE,
1994   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1995   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1996   tune_params::DISPARAGE_FLAGS_NEITHER,
1997   tune_params::PREF_NEON_64_FALSE,
1998   tune_params::PREF_NEON_STRINGOPS_TRUE,
1999   tune_params::FUSE_NOTHING,
2000   tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_cortex_a7_tune =
2004 {
2005   &cortexa7_extra_costs,
2006   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2007   NULL,                                 /* Sched adj cost.  */
2008   arm_default_branch_cost,
2009   &arm_default_vec_cost,
2010   1,                                            /* Constant limit.  */
2011   5,                                            /* Max cond insns.  */
2012   8,                                            /* Memset max inline.  */
2013   2,                                            /* Issue rate.  */
2014   ARM_PREFETCH_NOT_BENEFICIAL,
2015   tune_params::PREF_CONST_POOL_FALSE,
2016   tune_params::PREF_LDRD_FALSE,
2017   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2018   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2019   tune_params::DISPARAGE_FLAGS_NEITHER,
2020   tune_params::PREF_NEON_64_FALSE,
2021   tune_params::PREF_NEON_STRINGOPS_TRUE,
2022   tune_params::FUSE_NOTHING,
2023   tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a15_tune =
2027 {
2028   &cortexa15_extra_costs,
2029   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2030   NULL,                                 /* Sched adj cost.  */
2031   arm_default_branch_cost,
2032   &arm_default_vec_cost,
2033   1,                                            /* Constant limit.  */
2034   2,                                            /* Max cond insns.  */
2035   8,                                            /* Memset max inline.  */
2036   3,                                            /* Issue rate.  */
2037   ARM_PREFETCH_NOT_BENEFICIAL,
2038   tune_params::PREF_CONST_POOL_FALSE,
2039   tune_params::PREF_LDRD_TRUE,
2040   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2041   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2042   tune_params::DISPARAGE_FLAGS_ALL,
2043   tune_params::PREF_NEON_64_FALSE,
2044   tune_params::PREF_NEON_STRINGOPS_TRUE,
2045   tune_params::FUSE_NOTHING,
2046   tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_cortex_a35_tune =
2050 {
2051   &cortexa53_extra_costs,
2052   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2053   NULL,                                 /* Sched adj cost.  */
2054   arm_default_branch_cost,
2055   &arm_default_vec_cost,
2056   1,                                            /* Constant limit.  */
2057   5,                                            /* Max cond insns.  */
2058   8,                                            /* Memset max inline.  */
2059   1,                                            /* Issue rate.  */
2060   ARM_PREFETCH_NOT_BENEFICIAL,
2061   tune_params::PREF_CONST_POOL_FALSE,
2062   tune_params::PREF_LDRD_FALSE,
2063   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2064   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2065   tune_params::DISPARAGE_FLAGS_NEITHER,
2066   tune_params::PREF_NEON_64_FALSE,
2067   tune_params::PREF_NEON_STRINGOPS_TRUE,
2068   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2069   tune_params::SCHED_AUTOPREF_OFF
2070 };
2071
2072 const struct tune_params arm_cortex_a53_tune =
2073 {
2074   &cortexa53_extra_costs,
2075   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2076   NULL,                                 /* Sched adj cost.  */
2077   arm_default_branch_cost,
2078   &arm_default_vec_cost,
2079   1,                                            /* Constant limit.  */
2080   5,                                            /* Max cond insns.  */
2081   8,                                            /* Memset max inline.  */
2082   2,                                            /* Issue rate.  */
2083   ARM_PREFETCH_NOT_BENEFICIAL,
2084   tune_params::PREF_CONST_POOL_FALSE,
2085   tune_params::PREF_LDRD_FALSE,
2086   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2087   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2088   tune_params::DISPARAGE_FLAGS_NEITHER,
2089   tune_params::PREF_NEON_64_FALSE,
2090   tune_params::PREF_NEON_STRINGOPS_TRUE,
2091   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2092   tune_params::SCHED_AUTOPREF_OFF
2093 };
2094
2095 const struct tune_params arm_cortex_a57_tune =
2096 {
2097   &cortexa57_extra_costs,
2098   &generic_addr_mode_costs,             /* addressing mode costs */
2099   NULL,                                 /* Sched adj cost.  */
2100   arm_default_branch_cost,
2101   &arm_default_vec_cost,
2102   1,                                            /* Constant limit.  */
2103   2,                                            /* Max cond insns.  */
2104   8,                                            /* Memset max inline.  */
2105   3,                                            /* Issue rate.  */
2106   ARM_PREFETCH_NOT_BENEFICIAL,
2107   tune_params::PREF_CONST_POOL_FALSE,
2108   tune_params::PREF_LDRD_TRUE,
2109   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2110   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2111   tune_params::DISPARAGE_FLAGS_ALL,
2112   tune_params::PREF_NEON_64_FALSE,
2113   tune_params::PREF_NEON_STRINGOPS_TRUE,
2114   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2115   tune_params::SCHED_AUTOPREF_FULL
2116 };
2117
2118 const struct tune_params arm_exynosm1_tune =
2119 {
2120   &exynosm1_extra_costs,
2121   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2122   NULL,                                         /* Sched adj cost.  */
2123   arm_default_branch_cost,
2124   &arm_default_vec_cost,
2125   1,                                            /* Constant limit.  */
2126   2,                                            /* Max cond insns.  */
2127   8,                                            /* Memset max inline.  */
2128   3,                                            /* Issue rate.  */
2129   ARM_PREFETCH_NOT_BENEFICIAL,
2130   tune_params::PREF_CONST_POOL_FALSE,
2131   tune_params::PREF_LDRD_TRUE,
2132   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2133   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2134   tune_params::DISPARAGE_FLAGS_ALL,
2135   tune_params::PREF_NEON_64_FALSE,
2136   tune_params::PREF_NEON_STRINGOPS_TRUE,
2137   tune_params::FUSE_NOTHING,
2138   tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_xgene1_tune =
2142 {
2143   &xgene1_extra_costs,
2144   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2145   NULL,                                 /* Sched adj cost.  */
2146   arm_default_branch_cost,
2147   &arm_default_vec_cost,
2148   1,                                            /* Constant limit.  */
2149   2,                                            /* Max cond insns.  */
2150   32,                                           /* Memset max inline.  */
2151   4,                                            /* Issue rate.  */
2152   ARM_PREFETCH_NOT_BENEFICIAL,
2153   tune_params::PREF_CONST_POOL_FALSE,
2154   tune_params::PREF_LDRD_TRUE,
2155   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2156   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2157   tune_params::DISPARAGE_FLAGS_ALL,
2158   tune_params::PREF_NEON_64_FALSE,
2159   tune_params::PREF_NEON_STRINGOPS_FALSE,
2160   tune_params::FUSE_NOTHING,
2161   tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165    less appealing.  Set max_insns_skipped to a low value.  */
2166
2167 const struct tune_params arm_cortex_a5_tune =
2168 {
2169   &cortexa5_extra_costs,
2170   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2171   NULL,                                 /* Sched adj cost.  */
2172   arm_cortex_a5_branch_cost,
2173   &arm_default_vec_cost,
2174   1,                                            /* Constant limit.  */
2175   1,                                            /* Max cond insns.  */
2176   8,                                            /* Memset max inline.  */
2177   2,                                            /* Issue rate.  */
2178   ARM_PREFETCH_NOT_BENEFICIAL,
2179   tune_params::PREF_CONST_POOL_FALSE,
2180   tune_params::PREF_LDRD_FALSE,
2181   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2182   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2183   tune_params::DISPARAGE_FLAGS_NEITHER,
2184   tune_params::PREF_NEON_64_FALSE,
2185   tune_params::PREF_NEON_STRINGOPS_TRUE,
2186   tune_params::FUSE_NOTHING,
2187   tune_params::SCHED_AUTOPREF_OFF
2188 };
2189
2190 const struct tune_params arm_cortex_a9_tune =
2191 {
2192   &cortexa9_extra_costs,
2193   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2194   cortex_a9_sched_adjust_cost,
2195   arm_default_branch_cost,
2196   &arm_default_vec_cost,
2197   1,                                            /* Constant limit.  */
2198   5,                                            /* Max cond insns.  */
2199   8,                                            /* Memset max inline.  */
2200   2,                                            /* Issue rate.  */
2201   ARM_PREFETCH_BENEFICIAL(4,32,32),
2202   tune_params::PREF_CONST_POOL_FALSE,
2203   tune_params::PREF_LDRD_FALSE,
2204   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2205   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2206   tune_params::DISPARAGE_FLAGS_NEITHER,
2207   tune_params::PREF_NEON_64_FALSE,
2208   tune_params::PREF_NEON_STRINGOPS_FALSE,
2209   tune_params::FUSE_NOTHING,
2210   tune_params::SCHED_AUTOPREF_OFF
2211 };
2212
2213 const struct tune_params arm_cortex_a12_tune =
2214 {
2215   &cortexa12_extra_costs,
2216   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2217   NULL,                                 /* Sched adj cost.  */
2218   arm_default_branch_cost,
2219   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2220   1,                                            /* Constant limit.  */
2221   2,                                            /* Max cond insns.  */
2222   8,                                            /* Memset max inline.  */
2223   2,                                            /* Issue rate.  */
2224   ARM_PREFETCH_NOT_BENEFICIAL,
2225   tune_params::PREF_CONST_POOL_FALSE,
2226   tune_params::PREF_LDRD_TRUE,
2227   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2228   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2229   tune_params::DISPARAGE_FLAGS_ALL,
2230   tune_params::PREF_NEON_64_FALSE,
2231   tune_params::PREF_NEON_STRINGOPS_TRUE,
2232   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2233   tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a73_tune =
2237 {
2238   &cortexa57_extra_costs,
2239   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2240   NULL,                                         /* Sched adj cost.  */
2241   arm_default_branch_cost,
2242   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2243   1,                                            /* Constant limit.  */
2244   2,                                            /* Max cond insns.  */
2245   8,                                            /* Memset max inline.  */
2246   2,                                            /* Issue rate.  */
2247   ARM_PREFETCH_NOT_BENEFICIAL,
2248   tune_params::PREF_CONST_POOL_FALSE,
2249   tune_params::PREF_LDRD_TRUE,
2250   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2251   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2252   tune_params::DISPARAGE_FLAGS_ALL,
2253   tune_params::PREF_NEON_64_FALSE,
2254   tune_params::PREF_NEON_STRINGOPS_TRUE,
2255   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2256   tune_params::SCHED_AUTOPREF_FULL
2257 };
2258
2259 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2260    cycle to execute each.  An LDR from the constant pool also takes two cycles
2261    to execute, but mildly increases pipelining opportunity (consecutive
2262    loads/stores can be pipelined together, saving one cycle), and may also
2263    improve icache utilisation.  Hence we prefer the constant pool for such
2264    processors.  */
2265
2266 const struct tune_params arm_v7m_tune =
2267 {
2268   &v7m_extra_costs,
2269   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2270   NULL,                                 /* Sched adj cost.  */
2271   arm_cortex_m_branch_cost,
2272   &arm_default_vec_cost,
2273   1,                                            /* Constant limit.  */
2274   2,                                            /* Max cond insns.  */
2275   8,                                            /* Memset max inline.  */
2276   1,                                            /* Issue rate.  */
2277   ARM_PREFETCH_NOT_BENEFICIAL,
2278   tune_params::PREF_CONST_POOL_TRUE,
2279   tune_params::PREF_LDRD_FALSE,
2280   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2281   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2282   tune_params::DISPARAGE_FLAGS_NEITHER,
2283   tune_params::PREF_NEON_64_FALSE,
2284   tune_params::PREF_NEON_STRINGOPS_FALSE,
2285   tune_params::FUSE_NOTHING,
2286   tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 /* Cortex-M7 tuning.  */
2290
2291 const struct tune_params arm_cortex_m7_tune =
2292 {
2293   &v7m_extra_costs,
2294   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2295   NULL,                                 /* Sched adj cost.  */
2296   arm_cortex_m7_branch_cost,
2297   &arm_default_vec_cost,
2298   0,                                            /* Constant limit.  */
2299   1,                                            /* Max cond insns.  */
2300   8,                                            /* Memset max inline.  */
2301   2,                                            /* Issue rate.  */
2302   ARM_PREFETCH_NOT_BENEFICIAL,
2303   tune_params::PREF_CONST_POOL_TRUE,
2304   tune_params::PREF_LDRD_FALSE,
2305   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2306   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2307   tune_params::DISPARAGE_FLAGS_NEITHER,
2308   tune_params::PREF_NEON_64_FALSE,
2309   tune_params::PREF_NEON_STRINGOPS_FALSE,
2310   tune_params::FUSE_NOTHING,
2311   tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2316    cortex-m23.  */
2317 const struct tune_params arm_v6m_tune =
2318 {
2319   &generic_extra_costs,                 /* Insn extra costs.  */
2320   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2321   NULL,                                 /* Sched adj cost.  */
2322   arm_default_branch_cost,
2323   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2324   1,                                            /* Constant limit.  */
2325   5,                                            /* Max cond insns.  */
2326   8,                                            /* Memset max inline.  */
2327   1,                                            /* Issue rate.  */
2328   ARM_PREFETCH_NOT_BENEFICIAL,
2329   tune_params::PREF_CONST_POOL_FALSE,
2330   tune_params::PREF_LDRD_FALSE,
2331   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2332   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2333   tune_params::DISPARAGE_FLAGS_NEITHER,
2334   tune_params::PREF_NEON_64_FALSE,
2335   tune_params::PREF_NEON_STRINGOPS_FALSE,
2336   tune_params::FUSE_NOTHING,
2337   tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 const struct tune_params arm_fa726te_tune =
2341 {
2342   &generic_extra_costs,                         /* Insn extra costs.  */
2343   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2344   fa726te_sched_adjust_cost,
2345   arm_default_branch_cost,
2346   &arm_default_vec_cost,
2347   1,                                            /* Constant limit.  */
2348   5,                                            /* Max cond insns.  */
2349   8,                                            /* Memset max inline.  */
2350   2,                                            /* Issue rate.  */
2351   ARM_PREFETCH_NOT_BENEFICIAL,
2352   tune_params::PREF_CONST_POOL_TRUE,
2353   tune_params::PREF_LDRD_FALSE,
2354   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2355   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2356   tune_params::DISPARAGE_FLAGS_NEITHER,
2357   tune_params::PREF_NEON_64_FALSE,
2358   tune_params::PREF_NEON_STRINGOPS_FALSE,
2359   tune_params::FUSE_NOTHING,
2360   tune_params::SCHED_AUTOPREF_OFF
2361 };
2362
2363 /* Auto-generated CPU, FPU and architecture tables.  */
2364 #include "arm-cpu-data.h"
2365
2366 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2367    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368    is thus chosen to be big enough to hold the longest architecture name.  */
2369
2370 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2371
2372 /* Supported TLS relocations.  */
2373
2374 enum tls_reloc {
2375   TLS_GD32,
2376   TLS_LDM32,
2377   TLS_LDO32,
2378   TLS_IE32,
2379   TLS_LE32,
2380   TLS_DESCSEQ   /* GNU scheme */
2381 };
2382
2383 /* The maximum number of insns to be used when loading a constant.  */
2384 inline static int
2385 arm_constant_limit (bool size_p)
2386 {
2387   return size_p ? 1 : current_tune->constant_limit;
2388 }
2389
2390 /* Emit an insn that's a simple single-set.  Both the operands must be known
2391    to be valid.  */
2392 inline static rtx_insn *
2393 emit_set_insn (rtx x, rtx y)
2394 {
2395   return emit_insn (gen_rtx_SET (x, y));
2396 }
2397
2398 /* Return the number of bits set in VALUE.  */
2399 static unsigned
2400 bit_count (unsigned long value)
2401 {
2402   unsigned long count = 0;
2403
2404   while (value)
2405     {
2406       count++;
2407       value &= value - 1;  /* Clear the least-significant set bit.  */
2408     }
2409
2410   return count;
2411 }
2412
2413 /* Return the number of bits set in BMAP.  */
2414 static unsigned
2415 bitmap_popcount (const sbitmap bmap)
2416 {
2417   unsigned int count = 0;
2418   unsigned int n = 0;
2419   sbitmap_iterator sbi;
2420
2421   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2422     count++;
2423   return count;
2424 }
2425
2426 typedef struct
2427 {
2428   machine_mode mode;
2429   const char *name;
2430 } arm_fixed_mode_set;
2431
2432 /* A small helper for setting fixed-point library libfuncs.  */
2433
2434 static void
2435 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2436                              const char *funcname, const char *modename,
2437                              int num_suffix)
2438 {
2439   char buffer[50];
2440
2441   if (num_suffix == 0)
2442     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2443   else
2444     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2445
2446   set_optab_libfunc (optable, mode, buffer);
2447 }
2448
2449 static void
2450 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2451                             machine_mode from, const char *funcname,
2452                             const char *toname, const char *fromname)
2453 {
2454   char buffer[50];
2455   const char *maybe_suffix_2 = "";
2456
2457   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2458   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2459       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2460       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2461     maybe_suffix_2 = "2";
2462
2463   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2464            maybe_suffix_2);
2465
2466   set_conv_libfunc (optable, to, from, buffer);
2467 }
2468
2469 /* Set up library functions unique to ARM.  */
2470
2471 static void
2472 arm_init_libfuncs (void)
2473 {
2474   /* For Linux, we have access to kernel support for atomic operations.  */
2475   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2476     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2477
2478   /* There are no special library functions unless we are using the
2479      ARM BPABI.  */
2480   if (!TARGET_BPABI)
2481     return;
2482
2483   /* The functions below are described in Section 4 of the "Run-Time
2484      ABI for the ARM architecture", Version 1.0.  */
2485
2486   /* Double-precision floating-point arithmetic.  Table 2.  */
2487   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2488   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2489   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2490   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2491   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2492
2493   /* Double-precision comparisons.  Table 3.  */
2494   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2495   set_optab_libfunc (ne_optab, DFmode, NULL);
2496   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2497   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2498   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2499   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2500   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2501
2502   /* Single-precision floating-point arithmetic.  Table 4.  */
2503   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2504   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2505   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2506   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2507   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2508
2509   /* Single-precision comparisons.  Table 5.  */
2510   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2511   set_optab_libfunc (ne_optab, SFmode, NULL);
2512   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2513   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2514   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2515   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2516   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2517
2518   /* Floating-point to integer conversions.  Table 6.  */
2519   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2520   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2521   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2522   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2523   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2524   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2525   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2526   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2527
2528   /* Conversions between floating types.  Table 7.  */
2529   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2530   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2531
2532   /* Integer to floating-point conversions.  Table 8.  */
2533   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2534   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2535   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2536   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2537   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2538   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2539   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2540   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2541
2542   /* Long long.  Table 9.  */
2543   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2544   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2545   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2546   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2547   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2548   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2549   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2550   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2551
2552   /* Integer (32/32->32) division.  \S 4.3.1.  */
2553   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2554   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2555
2556   /* The divmod functions are designed so that they can be used for
2557      plain division, even though they return both the quotient and the
2558      remainder.  The quotient is returned in the usual location (i.e.,
2559      r0 for SImode, {r0, r1} for DImode), just as would be expected
2560      for an ordinary division routine.  Because the AAPCS calling
2561      conventions specify that all of { r0, r1, r2, r3 } are
2562      callee-saved registers, there is no need to tell the compiler
2563      explicitly that those registers are clobbered by these
2564      routines.  */
2565   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2566   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2567
2568   /* For SImode division the ABI provides div-without-mod routines,
2569      which are faster.  */
2570   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2571   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2572
2573   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2574      divmod libcalls instead.  */
2575   set_optab_libfunc (smod_optab, DImode, NULL);
2576   set_optab_libfunc (umod_optab, DImode, NULL);
2577   set_optab_libfunc (smod_optab, SImode, NULL);
2578   set_optab_libfunc (umod_optab, SImode, NULL);
2579
2580   /* Half-precision float operations.  The compiler handles all operations
2581      with NULL libfuncs by converting the SFmode.  */
2582   switch (arm_fp16_format)
2583     {
2584     case ARM_FP16_FORMAT_IEEE:
2585     case ARM_FP16_FORMAT_ALTERNATIVE:
2586
2587       /* Conversions.  */
2588       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2589                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2590                          ? "__gnu_f2h_ieee"
2591                          : "__gnu_f2h_alternative"));
2592       set_conv_libfunc (sext_optab, SFmode, HFmode,
2593                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2594                          ? "__gnu_h2f_ieee"
2595                          : "__gnu_h2f_alternative"));
2596
2597       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2598                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2599                          ? "__gnu_d2h_ieee"
2600                          : "__gnu_d2h_alternative"));
2601
2602       /* Arithmetic.  */
2603       set_optab_libfunc (add_optab, HFmode, NULL);
2604       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2605       set_optab_libfunc (smul_optab, HFmode, NULL);
2606       set_optab_libfunc (neg_optab, HFmode, NULL);
2607       set_optab_libfunc (sub_optab, HFmode, NULL);
2608
2609       /* Comparisons.  */
2610       set_optab_libfunc (eq_optab, HFmode, NULL);
2611       set_optab_libfunc (ne_optab, HFmode, NULL);
2612       set_optab_libfunc (lt_optab, HFmode, NULL);
2613       set_optab_libfunc (le_optab, HFmode, NULL);
2614       set_optab_libfunc (ge_optab, HFmode, NULL);
2615       set_optab_libfunc (gt_optab, HFmode, NULL);
2616       set_optab_libfunc (unord_optab, HFmode, NULL);
2617       break;
2618
2619     default:
2620       break;
2621     }
2622
2623   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2624   {
2625     const arm_fixed_mode_set fixed_arith_modes[] =
2626       {
2627         { E_QQmode, "qq" },
2628         { E_UQQmode, "uqq" },
2629         { E_HQmode, "hq" },
2630         { E_UHQmode, "uhq" },
2631         { E_SQmode, "sq" },
2632         { E_USQmode, "usq" },
2633         { E_DQmode, "dq" },
2634         { E_UDQmode, "udq" },
2635         { E_TQmode, "tq" },
2636         { E_UTQmode, "utq" },
2637         { E_HAmode, "ha" },
2638         { E_UHAmode, "uha" },
2639         { E_SAmode, "sa" },
2640         { E_USAmode, "usa" },
2641         { E_DAmode, "da" },
2642         { E_UDAmode, "uda" },
2643         { E_TAmode, "ta" },
2644         { E_UTAmode, "uta" }
2645       };
2646     const arm_fixed_mode_set fixed_conv_modes[] =
2647       {
2648         { E_QQmode, "qq" },
2649         { E_UQQmode, "uqq" },
2650         { E_HQmode, "hq" },
2651         { E_UHQmode, "uhq" },
2652         { E_SQmode, "sq" },
2653         { E_USQmode, "usq" },
2654         { E_DQmode, "dq" },
2655         { E_UDQmode, "udq" },
2656         { E_TQmode, "tq" },
2657         { E_UTQmode, "utq" },
2658         { E_HAmode, "ha" },
2659         { E_UHAmode, "uha" },
2660         { E_SAmode, "sa" },
2661         { E_USAmode, "usa" },
2662         { E_DAmode, "da" },
2663         { E_UDAmode, "uda" },
2664         { E_TAmode, "ta" },
2665         { E_UTAmode, "uta" },
2666         { E_QImode, "qi" },
2667         { E_HImode, "hi" },
2668         { E_SImode, "si" },
2669         { E_DImode, "di" },
2670         { E_TImode, "ti" },
2671         { E_SFmode, "sf" },
2672         { E_DFmode, "df" }
2673       };
2674     unsigned int i, j;
2675
2676     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2677       {
2678         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2679                                      "add", fixed_arith_modes[i].name, 3);
2680         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2681                                      "ssadd", fixed_arith_modes[i].name, 3);
2682         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2683                                      "usadd", fixed_arith_modes[i].name, 3);
2684         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2685                                      "sub", fixed_arith_modes[i].name, 3);
2686         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2687                                      "sssub", fixed_arith_modes[i].name, 3);
2688         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2689                                      "ussub", fixed_arith_modes[i].name, 3);
2690         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2691                                      "mul", fixed_arith_modes[i].name, 3);
2692         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2693                                      "ssmul", fixed_arith_modes[i].name, 3);
2694         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2695                                      "usmul", fixed_arith_modes[i].name, 3);
2696         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2697                                      "div", fixed_arith_modes[i].name, 3);
2698         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2699                                      "udiv", fixed_arith_modes[i].name, 3);
2700         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2701                                      "ssdiv", fixed_arith_modes[i].name, 3);
2702         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2703                                      "usdiv", fixed_arith_modes[i].name, 3);
2704         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2705                                      "neg", fixed_arith_modes[i].name, 2);
2706         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2707                                      "ssneg", fixed_arith_modes[i].name, 2);
2708         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2709                                      "usneg", fixed_arith_modes[i].name, 2);
2710         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2711                                      "ashl", fixed_arith_modes[i].name, 3);
2712         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2713                                      "ashr", fixed_arith_modes[i].name, 3);
2714         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2715                                      "lshr", fixed_arith_modes[i].name, 3);
2716         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2717                                      "ssashl", fixed_arith_modes[i].name, 3);
2718         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2719                                      "usashl", fixed_arith_modes[i].name, 3);
2720         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2721                                      "cmp", fixed_arith_modes[i].name, 2);
2722       }
2723
2724     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2725       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2726         {
2727           if (i == j
2728               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2729                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2730             continue;
2731
2732           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2733                                       fixed_conv_modes[j].mode, "fract",
2734                                       fixed_conv_modes[i].name,
2735                                       fixed_conv_modes[j].name);
2736           arm_set_fixed_conv_libfunc (satfract_optab,
2737                                       fixed_conv_modes[i].mode,
2738                                       fixed_conv_modes[j].mode, "satfract",
2739                                       fixed_conv_modes[i].name,
2740                                       fixed_conv_modes[j].name);
2741           arm_set_fixed_conv_libfunc (fractuns_optab,
2742                                       fixed_conv_modes[i].mode,
2743                                       fixed_conv_modes[j].mode, "fractuns",
2744                                       fixed_conv_modes[i].name,
2745                                       fixed_conv_modes[j].name);
2746           arm_set_fixed_conv_libfunc (satfractuns_optab,
2747                                       fixed_conv_modes[i].mode,
2748                                       fixed_conv_modes[j].mode, "satfractuns",
2749                                       fixed_conv_modes[i].name,
2750                                       fixed_conv_modes[j].name);
2751         }
2752   }
2753
2754   if (TARGET_AAPCS_BASED)
2755     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2756 }
2757
2758 /* On AAPCS systems, this is the "struct __va_list".  */
2759 static GTY(()) tree va_list_type;
2760
2761 /* Return the type to use as __builtin_va_list.  */
2762 static tree
2763 arm_build_builtin_va_list (void)
2764 {
2765   tree va_list_name;
2766   tree ap_field;
2767
2768   if (!TARGET_AAPCS_BASED)
2769     return std_build_builtin_va_list ();
2770
2771   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2772      defined as:
2773
2774        struct __va_list
2775        {
2776          void *__ap;
2777        };
2778
2779      The C Library ABI further reinforces this definition in \S
2780      4.1.
2781
2782      We must follow this definition exactly.  The structure tag
2783      name is visible in C++ mangled names, and thus forms a part
2784      of the ABI.  The field name may be used by people who
2785      #include <stdarg.h>.  */
2786   /* Create the type.  */
2787   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2788   /* Give it the required name.  */
2789   va_list_name = build_decl (BUILTINS_LOCATION,
2790                              TYPE_DECL,
2791                              get_identifier ("__va_list"),
2792                              va_list_type);
2793   DECL_ARTIFICIAL (va_list_name) = 1;
2794   TYPE_NAME (va_list_type) = va_list_name;
2795   TYPE_STUB_DECL (va_list_type) = va_list_name;
2796   /* Create the __ap field.  */
2797   ap_field = build_decl (BUILTINS_LOCATION,
2798                          FIELD_DECL,
2799                          get_identifier ("__ap"),
2800                          ptr_type_node);
2801   DECL_ARTIFICIAL (ap_field) = 1;
2802   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2803   TYPE_FIELDS (va_list_type) = ap_field;
2804   /* Compute its layout.  */
2805   layout_type (va_list_type);
2806
2807   return va_list_type;
2808 }
2809
2810 /* Return an expression of type "void *" pointing to the next
2811    available argument in a variable-argument list.  VALIST is the
2812    user-level va_list object, of type __builtin_va_list.  */
2813 static tree
2814 arm_extract_valist_ptr (tree valist)
2815 {
2816   if (TREE_TYPE (valist) == error_mark_node)
2817     return error_mark_node;
2818
2819   /* On an AAPCS target, the pointer is stored within "struct
2820      va_list".  */
2821   if (TARGET_AAPCS_BASED)
2822     {
2823       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2824       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2825                        valist, ap_field, NULL_TREE);
2826     }
2827
2828   return valist;
2829 }
2830
2831 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2832 static void
2833 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2834 {
2835   valist = arm_extract_valist_ptr (valist);
2836   std_expand_builtin_va_start (valist, nextarg);
2837 }
2838
2839 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2840 static tree
2841 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2842                           gimple_seq *post_p)
2843 {
2844   valist = arm_extract_valist_ptr (valist);
2845   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2846 }
2847
2848 /* Check any incompatible options that the user has specified.  */
2849 static void
2850 arm_option_check_internal (struct gcc_options *opts)
2851 {
2852   int flags = opts->x_target_flags;
2853
2854   /* iWMMXt and NEON are incompatible.  */
2855   if (TARGET_IWMMXT
2856       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2857     error ("iWMMXt and NEON are incompatible");
2858
2859   /* Make sure that the processor choice does not conflict with any of the
2860      other command line choices.  */
2861   if (TARGET_ARM_P (flags)
2862       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2863     error ("target CPU does not support ARM mode");
2864
2865   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2866   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2867     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2868
2869   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2870     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2871
2872   /* If this target is normally configured to use APCS frames, warn if they
2873      are turned off and debugging is turned on.  */
2874   if (TARGET_ARM_P (flags)
2875       && write_symbols != NO_DEBUG
2876       && !TARGET_APCS_FRAME
2877       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2878     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879
2880   /* iWMMXt unsupported under Thumb mode.  */
2881   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2882     error ("iWMMXt unsupported under Thumb mode");
2883
2884   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2885     error ("can not use -mtp=cp15 with 16-bit Thumb");
2886
2887   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2888     {
2889       error ("RTP PIC is incompatible with Thumb");
2890       flag_pic = 0;
2891     }
2892
2893   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2894      with MOVT.  */
2895   if ((target_pure_code || target_slow_flash_data)
2896       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2897     {
2898       const char *flag = (target_pure_code ? "-mpure-code" :
2899                                              "-mslow-flash-data");
2900       error ("%s only supports non-pic code on M-profile targets with the "
2901              "MOVT instruction", flag);
2902     }
2903
2904 }
2905
2906 /* Recompute the global settings depending on target attribute options.  */
2907
2908 static void
2909 arm_option_params_internal (void)
2910 {
2911   /* If we are not using the default (ARM mode) section anchor offset
2912      ranges, then set the correct ranges now.  */
2913   if (TARGET_THUMB1)
2914     {
2915       /* Thumb-1 LDR instructions cannot have negative offsets.
2916          Permissible positive offset ranges are 5-bit (for byte loads),
2917          6-bit (for halfword loads), or 7-bit (for word loads).
2918          Empirical results suggest a 7-bit anchor range gives the best
2919          overall code size.  */
2920       targetm.min_anchor_offset = 0;
2921       targetm.max_anchor_offset = 127;
2922     }
2923   else if (TARGET_THUMB2)
2924     {
2925       /* The minimum is set such that the total size of the block
2926          for a particular anchor is 248 + 1 + 4095 bytes, which is
2927          divisible by eight, ensuring natural spacing of anchors.  */
2928       targetm.min_anchor_offset = -248;
2929       targetm.max_anchor_offset = 4095;
2930     }
2931   else
2932     {
2933       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2934       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2935     }
2936
2937   /* Increase the number of conditional instructions with -Os.  */
2938   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2939
2940   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2941   if (TARGET_THUMB2)
2942     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2943 }
2944
2945 /* True if -mflip-thumb should next add an attribute for the default
2946    mode, false if it should next add an attribute for the opposite mode.  */
2947 static GTY(()) bool thumb_flipper;
2948
2949 /* Options after initial target override.  */
2950 static GTY(()) tree init_optimize;
2951
2952 static void
2953 arm_override_options_after_change_1 (struct gcc_options *opts)
2954 {
2955   /* -falign-functions without argument: supply one.  */
2956   if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2957     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2958       && opts->x_optimize_size ? "2" : "4";
2959 }
2960
2961 /* Implement targetm.override_options_after_change.  */
2962
2963 static void
2964 arm_override_options_after_change (void)
2965 {
2966   arm_configure_build_target (&arm_active_target,
2967                               TREE_TARGET_OPTION (target_option_default_node),
2968                               &global_options_set, false);
2969
2970   arm_override_options_after_change_1 (&global_options);
2971 }
2972
2973 /* Implement TARGET_OPTION_SAVE.  */
2974 static void
2975 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2976 {
2977   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2978   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2979   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2980 }
2981
2982 /* Implement TARGET_OPTION_RESTORE.  */
2983 static void
2984 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2985 {
2986   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2987   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2988   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2989   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2990                               false);
2991 }
2992
2993 /* Reset options between modes that the user has specified.  */
2994 static void
2995 arm_option_override_internal (struct gcc_options *opts,
2996                               struct gcc_options *opts_set)
2997 {
2998   arm_override_options_after_change_1 (opts);
2999
3000   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3001     {
3002       /* The default is to enable interworking, so this warning message would
3003          be confusing to users who have just compiled with
3004          eg, -march=armv4.  */
3005       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3006       opts->x_target_flags &= ~MASK_INTERWORK;
3007     }
3008
3009   if (TARGET_THUMB_P (opts->x_target_flags)
3010       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3011     {
3012       warning (0, "target CPU does not support THUMB instructions");
3013       opts->x_target_flags &= ~MASK_THUMB;
3014     }
3015
3016   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3017     {
3018       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3019       opts->x_target_flags &= ~MASK_APCS_FRAME;
3020     }
3021
3022   /* Callee super interworking implies thumb interworking.  Adding
3023      this to the flags here simplifies the logic elsewhere.  */
3024   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3025     opts->x_target_flags |= MASK_INTERWORK;
3026
3027   /* need to remember initial values so combinaisons of options like
3028      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3029   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3030
3031   if (! opts_set->x_arm_restrict_it)
3032     opts->x_arm_restrict_it = arm_arch8;
3033
3034   /* ARM execution state and M profile don't have [restrict] IT.  */
3035   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3036     opts->x_arm_restrict_it = 0;
3037
3038   /* Enable -munaligned-access by default for
3039      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3040      i.e. Thumb2 and ARM state only.
3041      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3042      - ARMv8 architecture-base processors.
3043
3044      Disable -munaligned-access by default for
3045      - all pre-ARMv6 architecture-based processors
3046      - ARMv6-M architecture-based processors
3047      - ARMv8-M Baseline processors.  */
3048
3049   if (! opts_set->x_unaligned_access)
3050     {
3051       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3052                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3053     }
3054   else if (opts->x_unaligned_access == 1
3055            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3056     {
3057       warning (0, "target CPU does not support unaligned accesses");
3058      opts->x_unaligned_access = 0;
3059     }
3060
3061   /* Don't warn since it's on by default in -O2.  */
3062   if (TARGET_THUMB1_P (opts->x_target_flags))
3063     opts->x_flag_schedule_insns = 0;
3064   else
3065     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3066
3067   /* Disable shrink-wrap when optimizing function for size, since it tends to
3068      generate additional returns.  */
3069   if (optimize_function_for_size_p (cfun)
3070       && TARGET_THUMB2_P (opts->x_target_flags))
3071     opts->x_flag_shrink_wrap = false;
3072   else
3073     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3074
3075   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3076      - epilogue_insns - does not accurately model the corresponding insns
3077      emitted in the asm file.  In particular, see the comment in thumb_exit
3078      'Find out how many of the (return) argument registers we can corrupt'.
3079      As a consequence, the epilogue may clobber registers without fipa-ra
3080      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3081      TODO: Accurately model clobbers for epilogue_insns and reenable
3082      fipa-ra.  */
3083   if (TARGET_THUMB1_P (opts->x_target_flags))
3084     opts->x_flag_ipa_ra = 0;
3085   else
3086     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3087
3088   /* Thumb2 inline assembly code should always use unified syntax.
3089      This will apply to ARM and Thumb1 eventually.  */
3090   opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3091
3092 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3093   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3094 #endif
3095 }
3096
3097 static sbitmap isa_all_fpubits;
3098 static sbitmap isa_quirkbits;
3099
3100 /* Configure a build target TARGET from the user-specified options OPTS and
3101    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3102    architecture have been specified, but the two are not identical.  */
3103 void
3104 arm_configure_build_target (struct arm_build_target *target,
3105                             struct cl_target_option *opts,
3106                             struct gcc_options *opts_set,
3107                             bool warn_compatible)
3108 {
3109   const cpu_option *arm_selected_tune = NULL;
3110   const arch_option *arm_selected_arch = NULL;
3111   const cpu_option *arm_selected_cpu = NULL;
3112   const arm_fpu_desc *arm_selected_fpu = NULL;
3113   const char *tune_opts = NULL;
3114   const char *arch_opts = NULL;
3115   const char *cpu_opts = NULL;
3116
3117   bitmap_clear (target->isa);
3118   target->core_name = NULL;
3119   target->arch_name = NULL;
3120
3121   if (opts_set->x_arm_arch_string)
3122     {
3123       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3124                                                       "-march",
3125                                                       opts->x_arm_arch_string);
3126       arch_opts = strchr (opts->x_arm_arch_string, '+');
3127     }
3128
3129   if (opts_set->x_arm_cpu_string)
3130     {
3131       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3132                                                     opts->x_arm_cpu_string);
3133       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3134       arm_selected_tune = arm_selected_cpu;
3135       /* If taking the tuning from -mcpu, we don't need to rescan the
3136          options for tuning.  */
3137     }
3138
3139   if (opts_set->x_arm_tune_string)
3140     {
3141       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3142                                                      opts->x_arm_tune_string);
3143       tune_opts = strchr (opts->x_arm_tune_string, '+');
3144     }
3145
3146   if (arm_selected_arch)
3147     {
3148       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3149       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3150                                  arch_opts);
3151
3152       if (arm_selected_cpu)
3153         {
3154           auto_sbitmap cpu_isa (isa_num_bits);
3155           auto_sbitmap isa_delta (isa_num_bits);
3156
3157           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3158           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3159                                      cpu_opts);
3160           bitmap_xor (isa_delta, cpu_isa, target->isa);
3161           /* Ignore any bits that are quirk bits.  */
3162           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3163           /* Ignore (for now) any bits that might be set by -mfpu.  */
3164           bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3165
3166           if (!bitmap_empty_p (isa_delta))
3167             {
3168               if (warn_compatible)
3169                 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3170                          arm_selected_cpu->common.name,
3171                          arm_selected_arch->common.name);
3172               /* -march wins for code generation.
3173                  -mcpu wins for default tuning.  */
3174               if (!arm_selected_tune)
3175                 arm_selected_tune = arm_selected_cpu;
3176
3177               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3178               target->arch_name = arm_selected_arch->common.name;
3179             }
3180           else
3181             {
3182               /* Architecture and CPU are essentially the same.
3183                  Prefer the CPU setting.  */
3184               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3185               target->core_name = arm_selected_cpu->common.name;
3186               /* Copy the CPU's capabilities, so that we inherit the
3187                  appropriate extensions and quirks.  */
3188               bitmap_copy (target->isa, cpu_isa);
3189             }
3190         }
3191       else
3192         {
3193           /* Pick a CPU based on the architecture.  */
3194           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3195           target->arch_name = arm_selected_arch->common.name;
3196           /* Note: target->core_name is left unset in this path.  */
3197         }
3198     }
3199   else if (arm_selected_cpu)
3200     {
3201       target->core_name = arm_selected_cpu->common.name;
3202       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3203       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3204                                  cpu_opts);
3205       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3206     }
3207   /* If the user did not specify a processor or architecture, choose
3208      one for them.  */
3209   else
3210     {
3211       const cpu_option *sel;
3212       auto_sbitmap sought_isa (isa_num_bits);
3213       bitmap_clear (sought_isa);
3214       auto_sbitmap default_isa (isa_num_bits);
3215
3216       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3217                                                     TARGET_CPU_DEFAULT);
3218       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3219       gcc_assert (arm_selected_cpu->common.name);
3220
3221       /* RWE: All of the selection logic below (to the end of this
3222          'if' clause) looks somewhat suspect.  It appears to be mostly
3223          there to support forcing thumb support when the default CPU
3224          does not have thumb (somewhat dubious in terms of what the
3225          user might be expecting).  I think it should be removed once
3226          support for the pre-thumb era cores is removed.  */
3227       sel = arm_selected_cpu;
3228       arm_initialize_isa (default_isa, sel->common.isa_bits);
3229       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3230                                  cpu_opts);
3231
3232       /* Now check to see if the user has specified any command line
3233          switches that require certain abilities from the cpu.  */
3234
3235       if (TARGET_INTERWORK || TARGET_THUMB)
3236         bitmap_set_bit (sought_isa, isa_bit_thumb);
3237
3238       /* If there are such requirements and the default CPU does not
3239          satisfy them, we need to run over the complete list of
3240          cores looking for one that is satisfactory.  */
3241       if (!bitmap_empty_p (sought_isa)
3242           && !bitmap_subset_p (sought_isa, default_isa))
3243         {
3244           auto_sbitmap candidate_isa (isa_num_bits);
3245           /* We're only interested in a CPU with at least the
3246              capabilities of the default CPU and the required
3247              additional features.  */
3248           bitmap_ior (default_isa, default_isa, sought_isa);
3249
3250           /* Try to locate a CPU type that supports all of the abilities
3251              of the default CPU, plus the extra abilities requested by
3252              the user.  */
3253           for (sel = all_cores; sel->common.name != NULL; sel++)
3254             {
3255               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3256               /* An exact match?  */
3257               if (bitmap_equal_p (default_isa, candidate_isa))
3258                 break;
3259             }
3260
3261           if (sel->common.name == NULL)
3262             {
3263               unsigned current_bit_count = isa_num_bits;
3264               const cpu_option *best_fit = NULL;
3265
3266               /* Ideally we would like to issue an error message here
3267                  saying that it was not possible to find a CPU compatible
3268                  with the default CPU, but which also supports the command
3269                  line options specified by the programmer, and so they
3270                  ought to use the -mcpu=<name> command line option to
3271                  override the default CPU type.
3272
3273                  If we cannot find a CPU that has exactly the
3274                  characteristics of the default CPU and the given
3275                  command line options we scan the array again looking
3276                  for a best match.  The best match must have at least
3277                  the capabilities of the perfect match.  */
3278               for (sel = all_cores; sel->common.name != NULL; sel++)
3279                 {
3280                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3281
3282                   if (bitmap_subset_p (default_isa, candidate_isa))
3283                     {
3284                       unsigned count;
3285
3286                       bitmap_and_compl (candidate_isa, candidate_isa,
3287                                         default_isa);
3288                       count = bitmap_popcount (candidate_isa);
3289
3290                       if (count < current_bit_count)
3291                         {
3292                           best_fit = sel;
3293                           current_bit_count = count;
3294                         }
3295                     }
3296
3297                   gcc_assert (best_fit);
3298                   sel = best_fit;
3299                 }
3300             }
3301           arm_selected_cpu = sel;
3302         }
3303
3304       /* Now we know the CPU, we can finally initialize the target
3305          structure.  */
3306       target->core_name = arm_selected_cpu->common.name;
3307       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3308       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3309                                  cpu_opts);
3310       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3311     }
3312
3313   gcc_assert (arm_selected_cpu);
3314   gcc_assert (arm_selected_arch);
3315
3316   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3317     {
3318       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3319       auto_sbitmap fpu_bits (isa_num_bits);
3320
3321       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3322       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3323       bitmap_ior (target->isa, target->isa, fpu_bits);
3324     }
3325
3326   if (!arm_selected_tune)
3327     arm_selected_tune = arm_selected_cpu;
3328   else /* Validate the features passed to -mtune.  */
3329     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3330
3331   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3332
3333   /* Finish initializing the target structure.  */
3334   target->arch_pp_name = arm_selected_arch->arch;
3335   target->base_arch = arm_selected_arch->base_arch;
3336   target->profile = arm_selected_arch->profile;
3337
3338   target->tune_flags = tune_data->tune_flags;
3339   target->tune = tune_data->tune;
3340   target->tune_core = tune_data->scheduler;
3341   arm_option_reconfigure_globals ();
3342 }
3343
3344 /* Fix up any incompatible options that the user has specified.  */
3345 static void
3346 arm_option_override (void)
3347 {
3348   static const enum isa_feature fpu_bitlist[]
3349     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3350   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3351   cl_target_option opts;
3352
3353   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3354   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3355
3356   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3357   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3358
3359   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3360
3361   if (!global_options_set.x_arm_fpu_index)
3362     {
3363       bool ok;
3364       int fpu_index;
3365
3366       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3367                                   CL_TARGET);
3368       gcc_assert (ok);
3369       arm_fpu_index = (enum fpu_type) fpu_index;
3370     }
3371
3372   cl_target_option_save (&opts, &global_options);
3373   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3374                               true);
3375
3376 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3377   SUBTARGET_OVERRIDE_OPTIONS;
3378 #endif
3379
3380   /* Initialize boolean versions of the architectural flags, for use
3381      in the arm.md file and for enabling feature flags.  */
3382   arm_option_reconfigure_globals ();
3383
3384   arm_tune = arm_active_target.tune_core;
3385   tune_flags = arm_active_target.tune_flags;
3386   current_tune = arm_active_target.tune;
3387
3388   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3389   if (TARGET_APCS_FRAME)
3390     flag_shrink_wrap = false;
3391
3392   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3393     {
3394       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3395       target_flags |= MASK_APCS_FRAME;
3396     }
3397
3398   if (TARGET_POKE_FUNCTION_NAME)
3399     target_flags |= MASK_APCS_FRAME;
3400
3401   if (TARGET_APCS_REENT && flag_pic)
3402     error ("-fpic and -mapcs-reent are incompatible");
3403
3404   if (TARGET_APCS_REENT)
3405     warning (0, "APCS reentrant code not supported.  Ignored");
3406
3407   /* Set up some tuning parameters.  */
3408   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3409   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3410   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3411   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3412   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3413   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3414
3415   /* For arm2/3 there is no need to do any scheduling if we are doing
3416      software floating-point.  */
3417   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3418     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3419
3420   /* Override the default structure alignment for AAPCS ABI.  */
3421   if (!global_options_set.x_arm_structure_size_boundary)
3422     {
3423       if (TARGET_AAPCS_BASED)
3424         arm_structure_size_boundary = 8;
3425     }
3426   else
3427     {
3428       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3429
3430       if (arm_structure_size_boundary != 8
3431           && arm_structure_size_boundary != 32
3432           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3433         {
3434           if (ARM_DOUBLEWORD_ALIGN)
3435             warning (0,
3436                      "structure size boundary can only be set to 8, 32 or 64");
3437           else
3438             warning (0, "structure size boundary can only be set to 8 or 32");
3439           arm_structure_size_boundary
3440             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3441         }
3442     }
3443
3444   if (TARGET_VXWORKS_RTP)
3445     {
3446       if (!global_options_set.x_arm_pic_data_is_text_relative)
3447         arm_pic_data_is_text_relative = 0;
3448     }
3449   else if (flag_pic
3450            && !arm_pic_data_is_text_relative
3451            && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3452     /* When text & data segments don't have a fixed displacement, the
3453        intended use is with a single, read only, pic base register.
3454        Unless the user explicitly requested not to do that, set
3455        it.  */
3456     target_flags |= MASK_SINGLE_PIC_BASE;
3457
3458   /* If stack checking is disabled, we can use r10 as the PIC register,
3459      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3460   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3461     {
3462       if (TARGET_VXWORKS_RTP)
3463         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3464       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3465     }
3466
3467   if (flag_pic && TARGET_VXWORKS_RTP)
3468     arm_pic_register = 9;
3469
3470   if (arm_pic_register_string != NULL)
3471     {
3472       int pic_register = decode_reg_name (arm_pic_register_string);
3473
3474       if (!flag_pic)
3475         warning (0, "-mpic-register= is useless without -fpic");
3476
3477       /* Prevent the user from choosing an obviously stupid PIC register.  */
3478       else if (pic_register < 0 || call_used_regs[pic_register]
3479                || pic_register == HARD_FRAME_POINTER_REGNUM
3480                || pic_register == STACK_POINTER_REGNUM
3481                || pic_register >= PC_REGNUM
3482                || (TARGET_VXWORKS_RTP
3483                    && (unsigned int) pic_register != arm_pic_register))
3484         error ("unable to use '%s' for PIC register", arm_pic_register_string);
3485       else
3486         arm_pic_register = pic_register;
3487     }
3488
3489   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3490   if (fix_cm3_ldrd == 2)
3491     {
3492       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3493         fix_cm3_ldrd = 1;
3494       else
3495         fix_cm3_ldrd = 0;
3496     }
3497
3498   /* Hot/Cold partitioning is not currently supported, since we can't
3499      handle literal pool placement in that case.  */
3500   if (flag_reorder_blocks_and_partition)
3501     {
3502       inform (input_location,
3503               "-freorder-blocks-and-partition not supported on this architecture");
3504       flag_reorder_blocks_and_partition = 0;
3505       flag_reorder_blocks = 1;
3506     }
3507
3508   if (flag_pic)
3509     /* Hoisting PIC address calculations more aggressively provides a small,
3510        but measurable, size reduction for PIC code.  Therefore, we decrease
3511        the bar for unrestricted expression hoisting to the cost of PIC address
3512        calculation, which is 2 instructions.  */
3513     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3514                            global_options.x_param_values,
3515                            global_options_set.x_param_values);
3516
3517   /* ARM EABI defaults to strict volatile bitfields.  */
3518   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3519       && abi_version_at_least(2))
3520     flag_strict_volatile_bitfields = 1;
3521
3522   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3523      have deemed it beneficial (signified by setting
3524      prefetch.num_slots to 1 or more).  */
3525   if (flag_prefetch_loop_arrays < 0
3526       && HAVE_prefetch
3527       && optimize >= 3
3528       && current_tune->prefetch.num_slots > 0)
3529     flag_prefetch_loop_arrays = 1;
3530
3531   /* Set up parameters to be used in prefetching algorithm.  Do not
3532      override the defaults unless we are tuning for a core we have
3533      researched values for.  */
3534   if (current_tune->prefetch.num_slots > 0)
3535     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3536                            current_tune->prefetch.num_slots,
3537                            global_options.x_param_values,
3538                            global_options_set.x_param_values);
3539   if (current_tune->prefetch.l1_cache_line_size >= 0)
3540     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3541                            current_tune->prefetch.l1_cache_line_size,
3542                            global_options.x_param_values,
3543                            global_options_set.x_param_values);
3544   if (current_tune->prefetch.l1_cache_size >= 0)
3545     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3546                            current_tune->prefetch.l1_cache_size,
3547                            global_options.x_param_values,
3548                            global_options_set.x_param_values);
3549
3550   /* Use Neon to perform 64-bits operations rather than core
3551      registers.  */
3552   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3553   if (use_neon_for_64bits == 1)
3554      prefer_neon_for_64bits = true;
3555
3556   /* Use the alternative scheduling-pressure algorithm by default.  */
3557   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3558                          global_options.x_param_values,
3559                          global_options_set.x_param_values);
3560
3561   /* Look through ready list and all of queue for instructions
3562      relevant for L2 auto-prefetcher.  */
3563   int param_sched_autopref_queue_depth;
3564
3565   switch (current_tune->sched_autopref)
3566     {
3567     case tune_params::SCHED_AUTOPREF_OFF:
3568       param_sched_autopref_queue_depth = -1;
3569       break;
3570
3571     case tune_params::SCHED_AUTOPREF_RANK:
3572       param_sched_autopref_queue_depth = 0;
3573       break;
3574
3575     case tune_params::SCHED_AUTOPREF_FULL:
3576       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3577       break;
3578
3579     default:
3580       gcc_unreachable ();
3581     }
3582
3583   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3584                          param_sched_autopref_queue_depth,
3585                          global_options.x_param_values,
3586                          global_options_set.x_param_values);
3587
3588   /* Currently, for slow flash data, we just disable literal pools.  We also
3589      disable it for pure-code.  */
3590   if (target_slow_flash_data || target_pure_code)
3591     arm_disable_literal_pool = true;
3592
3593   /* Disable scheduling fusion by default if it's not armv7 processor
3594      or doesn't prefer ldrd/strd.  */
3595   if (flag_schedule_fusion == 2
3596       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3597     flag_schedule_fusion = 0;
3598
3599   /* Need to remember initial options before they are overriden.  */
3600   init_optimize = build_optimization_node (&global_options);
3601
3602   arm_options_perform_arch_sanity_checks ();
3603   arm_option_override_internal (&global_options, &global_options_set);
3604   arm_option_check_internal (&global_options);
3605   arm_option_params_internal ();
3606
3607   /* Create the default target_options structure.  */
3608   target_option_default_node = target_option_current_node
3609     = build_target_option_node (&global_options);
3610
3611   /* Register global variables with the garbage collector.  */
3612   arm_add_gc_roots ();
3613
3614   /* Init initial mode for testing.  */
3615   thumb_flipper = TARGET_THUMB;
3616 }
3617
3618
3619 /* Reconfigure global status flags from the active_target.isa.  */
3620 void
3621 arm_option_reconfigure_globals (void)
3622 {
3623   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3624   arm_base_arch = arm_active_target.base_arch;
3625
3626   /* Initialize boolean versions of the architectural flags, for use
3627      in the arm.md file.  */
3628   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3629   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3630   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3631   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3632   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3633   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3634   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3635   arm_arch6m = arm_arch6 && !arm_arch_notm;
3636   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3637   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3638   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3639   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3640   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3641   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3642   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3643   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3644   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3645   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3646   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3647   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3648   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3649   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3650   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3651   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3652   if (arm_fp16_inst)
3653     {
3654       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3655         error ("selected fp16 options are incompatible");
3656       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3657     }
3658
3659   /* And finally, set up some quirks.  */
3660   arm_arch_no_volatile_ce
3661     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3662   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3663                                             isa_bit_quirk_armv6kz);
3664
3665   /* Use the cp15 method if it is available.  */
3666   if (target_thread_pointer == TP_AUTO)
3667     {
3668       if (arm_arch6k && !TARGET_THUMB1)
3669         target_thread_pointer = TP_CP15;
3670       else
3671         target_thread_pointer = TP_SOFT;
3672     }
3673 }
3674
3675 /* Perform some validation between the desired architecture and the rest of the
3676    options.  */
3677 void
3678 arm_options_perform_arch_sanity_checks (void)
3679 {
3680   /* V5T code we generate is completely interworking capable, so we turn off
3681      TARGET_INTERWORK here to avoid many tests later on.  */
3682
3683   /* XXX However, we must pass the right pre-processor defines to CPP
3684      or GLD can get confused.  This is a hack.  */
3685   if (TARGET_INTERWORK)
3686     arm_cpp_interwork = 1;
3687
3688   if (arm_arch5t)
3689     target_flags &= ~MASK_INTERWORK;
3690
3691   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3692     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3693
3694   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3695     error ("iwmmxt abi requires an iwmmxt capable cpu");
3696
3697   /* BPABI targets use linker tricks to allow interworking on cores
3698      without thumb support.  */
3699   if (TARGET_INTERWORK
3700       && !TARGET_BPABI
3701       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3702     {
3703       warning (0, "target CPU does not support interworking" );
3704       target_flags &= ~MASK_INTERWORK;
3705     }
3706
3707   /* If soft-float is specified then don't use FPU.  */
3708   if (TARGET_SOFT_FLOAT)
3709     arm_fpu_attr = FPU_NONE;
3710   else
3711     arm_fpu_attr = FPU_VFP;
3712
3713   if (TARGET_AAPCS_BASED)
3714     {
3715       if (TARGET_CALLER_INTERWORKING)
3716         error ("AAPCS does not support -mcaller-super-interworking");
3717       else
3718         if (TARGET_CALLEE_INTERWORKING)
3719           error ("AAPCS does not support -mcallee-super-interworking");
3720     }
3721
3722   /* __fp16 support currently assumes the core has ldrh.  */
3723   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3724     sorry ("__fp16 and no ldrh");
3725
3726   if (use_cmse && !arm_arch_cmse)
3727     error ("target CPU does not support ARMv8-M Security Extensions");
3728
3729   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3730      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3731   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3732     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3733
3734
3735   if (TARGET_AAPCS_BASED)
3736     {
3737       if (arm_abi == ARM_ABI_IWMMXT)
3738         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3739       else if (TARGET_HARD_FLOAT_ABI)
3740         {
3741           arm_pcs_default = ARM_PCS_AAPCS_VFP;
3742           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3743             error ("-mfloat-abi=hard: selected processor lacks an FPU");
3744         }
3745       else
3746         arm_pcs_default = ARM_PCS_AAPCS;
3747     }
3748   else
3749     {
3750       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3751         sorry ("-mfloat-abi=hard and VFP");
3752
3753       if (arm_abi == ARM_ABI_APCS)
3754         arm_pcs_default = ARM_PCS_APCS;
3755       else
3756         arm_pcs_default = ARM_PCS_ATPCS;
3757     }
3758 }
3759
3760 static void
3761 arm_add_gc_roots (void)
3762 {
3763   gcc_obstack_init(&minipool_obstack);
3764   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3765 }
3766 \f
3767 /* A table of known ARM exception types.
3768    For use with the interrupt function attribute.  */
3769
3770 typedef struct
3771 {
3772   const char *const arg;
3773   const unsigned long return_value;
3774 }
3775 isr_attribute_arg;
3776
3777 static const isr_attribute_arg isr_attribute_args [] =
3778 {
3779   { "IRQ",   ARM_FT_ISR },
3780   { "irq",   ARM_FT_ISR },
3781   { "FIQ",   ARM_FT_FIQ },
3782   { "fiq",   ARM_FT_FIQ },
3783   { "ABORT", ARM_FT_ISR },
3784   { "abort", ARM_FT_ISR },
3785   { "ABORT", ARM_FT_ISR },
3786   { "abort", ARM_FT_ISR },
3787   { "UNDEF", ARM_FT_EXCEPTION },
3788   { "undef", ARM_FT_EXCEPTION },
3789   { "SWI",   ARM_FT_EXCEPTION },
3790   { "swi",   ARM_FT_EXCEPTION },
3791   { NULL,    ARM_FT_NORMAL }
3792 };
3793
3794 /* Returns the (interrupt) function type of the current
3795    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3796
3797 static unsigned long
3798 arm_isr_value (tree argument)
3799 {
3800   const isr_attribute_arg * ptr;
3801   const char *              arg;
3802
3803   if (!arm_arch_notm)
3804     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3805
3806   /* No argument - default to IRQ.  */
3807   if (argument == NULL_TREE)
3808     return ARM_FT_ISR;
3809
3810   /* Get the value of the argument.  */
3811   if (TREE_VALUE (argument) == NULL_TREE
3812       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3813     return ARM_FT_UNKNOWN;
3814
3815   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3816
3817   /* Check it against the list of known arguments.  */
3818   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3819     if (streq (arg, ptr->arg))
3820       return ptr->return_value;
3821
3822   /* An unrecognized interrupt type.  */
3823   return ARM_FT_UNKNOWN;
3824 }
3825
3826 /* Computes the type of the current function.  */
3827
3828 static unsigned long
3829 arm_compute_func_type (void)
3830 {
3831   unsigned long type = ARM_FT_UNKNOWN;
3832   tree a;
3833   tree attr;
3834
3835   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3836
3837   /* Decide if the current function is volatile.  Such functions
3838      never return, and many memory cycles can be saved by not storing
3839      register values that will never be needed again.  This optimization
3840      was added to speed up context switching in a kernel application.  */
3841   if (optimize > 0
3842       && (TREE_NOTHROW (current_function_decl)
3843           || !(flag_unwind_tables
3844                || (flag_exceptions
3845                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3846       && TREE_THIS_VOLATILE (current_function_decl))
3847     type |= ARM_FT_VOLATILE;
3848
3849   if (cfun->static_chain_decl != NULL)
3850     type |= ARM_FT_NESTED;
3851
3852   attr = DECL_ATTRIBUTES (current_function_decl);
3853
3854   a = lookup_attribute ("naked", attr);
3855   if (a != NULL_TREE)
3856     type |= ARM_FT_NAKED;
3857
3858   a = lookup_attribute ("isr", attr);
3859   if (a == NULL_TREE)
3860     a = lookup_attribute ("interrupt", attr);
3861
3862   if (a == NULL_TREE)
3863     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3864   else
3865     type |= arm_isr_value (TREE_VALUE (a));
3866
3867   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3868     type |= ARM_FT_CMSE_ENTRY;
3869
3870   return type;
3871 }
3872
3873 /* Returns the type of the current function.  */
3874
3875 unsigned long
3876 arm_current_func_type (void)
3877 {
3878   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3879     cfun->machine->func_type = arm_compute_func_type ();
3880
3881   return cfun->machine->func_type;
3882 }
3883
3884 bool
3885 arm_allocate_stack_slots_for_args (void)
3886 {
3887   /* Naked functions should not allocate stack slots for arguments.  */
3888   return !IS_NAKED (arm_current_func_type ());
3889 }
3890
3891 static bool
3892 arm_warn_func_return (tree decl)
3893 {
3894   /* Naked functions are implemented entirely in assembly, including the
3895      return sequence, so suppress warnings about this.  */
3896   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3897 }
3898
3899 \f
3900 /* Output assembler code for a block containing the constant parts
3901    of a trampoline, leaving space for the variable parts.
3902
3903    On the ARM, (if r8 is the static chain regnum, and remembering that
3904    referencing pc adds an offset of 8) the trampoline looks like:
3905            ldr          r8, [pc, #0]
3906            ldr          pc, [pc]
3907            .word        static chain value
3908            .word        function's address
3909    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3910
3911 static void
3912 arm_asm_trampoline_template (FILE *f)
3913 {
3914   fprintf (f, "\t.syntax unified\n");
3915
3916   if (TARGET_ARM)
3917     {
3918       fprintf (f, "\t.arm\n");
3919       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3920       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3921     }
3922   else if (TARGET_THUMB2)
3923     {
3924       fprintf (f, "\t.thumb\n");
3925       /* The Thumb-2 trampoline is similar to the arm implementation.
3926          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3927       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3928                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3929       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3930     }
3931   else
3932     {
3933       ASM_OUTPUT_ALIGN (f, 2);
3934       fprintf (f, "\t.code\t16\n");
3935       fprintf (f, ".Ltrampoline_start:\n");
3936       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3937       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3938       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3939       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3940       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3941       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3942     }
3943   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3944   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3945 }
3946
3947 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3948
3949 static void
3950 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3951 {
3952   rtx fnaddr, mem, a_tramp;
3953
3954   emit_block_move (m_tramp, assemble_trampoline_template (),
3955                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3956
3957   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3958   emit_move_insn (mem, chain_value);
3959
3960   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3961   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3962   emit_move_insn (mem, fnaddr);
3963
3964   a_tramp = XEXP (m_tramp, 0);
3965   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3966                      LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3967                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3968 }
3969
3970 /* Thumb trampolines should be entered in thumb mode, so set
3971    the bottom bit of the address.  */
3972
3973 static rtx
3974 arm_trampoline_adjust_address (rtx addr)
3975 {
3976   if (TARGET_THUMB)
3977     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3978                                 NULL, 0, OPTAB_LIB_WIDEN);
3979   return addr;
3980 }
3981 \f
3982 /* Return 1 if it is possible to return using a single instruction.
3983    If SIBLING is non-null, this is a test for a return before a sibling
3984    call.  SIBLING is the call insn, so we can examine its register usage.  */
3985
3986 int
3987 use_return_insn (int iscond, rtx sibling)
3988 {
3989   int regno;
3990   unsigned int func_type;
3991   unsigned long saved_int_regs;
3992   unsigned HOST_WIDE_INT stack_adjust;
3993   arm_stack_offsets *offsets;
3994
3995   /* Never use a return instruction before reload has run.  */
3996   if (!reload_completed)
3997     return 0;
3998
3999   func_type = arm_current_func_type ();
4000
4001   /* Naked, volatile and stack alignment functions need special
4002      consideration.  */
4003   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4004     return 0;
4005
4006   /* So do interrupt functions that use the frame pointer and Thumb
4007      interrupt functions.  */
4008   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4009     return 0;
4010
4011   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4012       && !optimize_function_for_size_p (cfun))
4013     return 0;
4014
4015   offsets = arm_get_frame_offsets ();
4016   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4017
4018   /* As do variadic functions.  */
4019   if (crtl->args.pretend_args_size
4020       || cfun->machine->uses_anonymous_args
4021       /* Or if the function calls __builtin_eh_return () */
4022       || crtl->calls_eh_return
4023       /* Or if the function calls alloca */
4024       || cfun->calls_alloca
4025       /* Or if there is a stack adjustment.  However, if the stack pointer
4026          is saved on the stack, we can use a pre-incrementing stack load.  */
4027       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4028                                  && stack_adjust == 4))
4029       /* Or if the static chain register was saved above the frame, under the
4030          assumption that the stack pointer isn't saved on the stack.  */
4031       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4032           && arm_compute_static_chain_stack_bytes() != 0))
4033     return 0;
4034
4035   saved_int_regs = offsets->saved_regs_mask;
4036
4037   /* Unfortunately, the insn
4038
4039        ldmib sp, {..., sp, ...}
4040
4041      triggers a bug on most SA-110 based devices, such that the stack
4042      pointer won't be correctly restored if the instruction takes a
4043      page fault.  We work around this problem by popping r3 along with
4044      the other registers, since that is never slower than executing
4045      another instruction.
4046
4047      We test for !arm_arch5t here, because code for any architecture
4048      less than this could potentially be run on one of the buggy
4049      chips.  */
4050   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4051     {
4052       /* Validate that r3 is a call-clobbered register (always true in
4053          the default abi) ...  */
4054       if (!call_used_regs[3])
4055         return 0;
4056
4057       /* ... that it isn't being used for a return value ... */
4058       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4059         return 0;
4060
4061       /* ... or for a tail-call argument ...  */
4062       if (sibling)
4063         {
4064           gcc_assert (CALL_P (sibling));
4065
4066           if (find_regno_fusage (sibling, USE, 3))
4067             return 0;
4068         }
4069
4070       /* ... and that there are no call-saved registers in r0-r2
4071          (always true in the default ABI).  */
4072       if (saved_int_regs & 0x7)
4073         return 0;
4074     }
4075
4076   /* Can't be done if interworking with Thumb, and any registers have been
4077      stacked.  */
4078   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4079     return 0;
4080
4081   /* On StrongARM, conditional returns are expensive if they aren't
4082      taken and multiple registers have been stacked.  */
4083   if (iscond && arm_tune_strongarm)
4084     {
4085       /* Conditional return when just the LR is stored is a simple
4086          conditional-load instruction, that's not expensive.  */
4087       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4088         return 0;
4089
4090       if (flag_pic
4091           && arm_pic_register != INVALID_REGNUM
4092           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4093         return 0;
4094     }
4095
4096   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4097      several instructions if anything needs to be popped.  */
4098   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4099     return 0;
4100
4101   /* If there are saved registers but the LR isn't saved, then we need
4102      two instructions for the return.  */
4103   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4104     return 0;
4105
4106   /* Can't be done if any of the VFP regs are pushed,
4107      since this also requires an insn.  */
4108   if (TARGET_HARD_FLOAT)
4109     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4110       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4111         return 0;
4112
4113   if (TARGET_REALLY_IWMMXT)
4114     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4115       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4116         return 0;
4117
4118   return 1;
4119 }
4120
4121 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4122    shrink-wrapping if possible.  This is the case if we need to emit a
4123    prologue, which we can test by looking at the offsets.  */
4124 bool
4125 use_simple_return_p (void)
4126 {
4127   arm_stack_offsets *offsets;
4128
4129   /* Note this function can be called before or after reload.  */
4130   if (!reload_completed)
4131     arm_compute_frame_layout ();
4132
4133   offsets = arm_get_frame_offsets ();
4134   return offsets->outgoing_args != 0;
4135 }
4136
4137 /* Return TRUE if int I is a valid immediate ARM constant.  */
4138
4139 int
4140 const_ok_for_arm (HOST_WIDE_INT i)
4141 {
4142   int lowbit;
4143
4144   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4145      be all zero, or all one.  */
4146   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4147       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4148           != ((~(unsigned HOST_WIDE_INT) 0)
4149               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4150     return FALSE;
4151
4152   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4153
4154   /* Fast return for 0 and small values.  We must do this for zero, since
4155      the code below can't handle that one case.  */
4156   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4157     return TRUE;
4158
4159   /* Get the number of trailing zeros.  */
4160   lowbit = ffs((int) i) - 1;
4161
4162   /* Only even shifts are allowed in ARM mode so round down to the
4163      nearest even number.  */
4164   if (TARGET_ARM)
4165     lowbit &= ~1;
4166
4167   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4168     return TRUE;
4169
4170   if (TARGET_ARM)
4171     {
4172       /* Allow rotated constants in ARM mode.  */
4173       if (lowbit <= 4
4174            && ((i & ~0xc000003f) == 0
4175                || (i & ~0xf000000f) == 0
4176                || (i & ~0xfc000003) == 0))
4177         return TRUE;
4178     }
4179   else if (TARGET_THUMB2)
4180     {
4181       HOST_WIDE_INT v;
4182
4183       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4184       v = i & 0xff;
4185       v |= v << 16;
4186       if (i == v || i == (v | (v << 8)))
4187         return TRUE;
4188
4189       /* Allow repeated pattern 0xXY00XY00.  */
4190       v = i & 0xff00;
4191       v |= v << 16;
4192       if (i == v)
4193         return TRUE;
4194     }
4195   else if (TARGET_HAVE_MOVT)
4196     {
4197       /* Thumb-1 Targets with MOVT.  */
4198       if (i > 0xffff)
4199         return FALSE;
4200       else
4201         return TRUE;
4202     }
4203
4204   return FALSE;
4205 }
4206
4207 /* Return true if I is a valid constant for the operation CODE.  */
4208 int
4209 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4210 {
4211   if (const_ok_for_arm (i))
4212     return 1;
4213
4214   switch (code)
4215     {
4216     case SET:
4217       /* See if we can use movw.  */
4218       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4219         return 1;
4220       else
4221         /* Otherwise, try mvn.  */
4222         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4223
4224     case PLUS:
4225       /* See if we can use addw or subw.  */
4226       if (TARGET_THUMB2
4227           && ((i & 0xfffff000) == 0
4228               || ((-i) & 0xfffff000) == 0))
4229         return 1;
4230       /* Fall through.  */
4231     case COMPARE:
4232     case EQ:
4233     case NE:
4234     case GT:
4235     case LE:
4236     case LT:
4237     case GE:
4238     case GEU:
4239     case LTU:
4240     case GTU:
4241     case LEU:
4242     case UNORDERED:
4243     case ORDERED:
4244     case UNEQ:
4245     case UNGE:
4246     case UNLT:
4247     case UNGT:
4248     case UNLE:
4249       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4250
4251     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4252     case XOR:
4253       return 0;
4254
4255     case IOR:
4256       if (TARGET_THUMB2)
4257         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4258       return 0;
4259
4260     case AND:
4261       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4262
4263     default:
4264       gcc_unreachable ();
4265     }
4266 }
4267
4268 /* Return true if I is a valid di mode constant for the operation CODE.  */
4269 int
4270 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4271 {
4272   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4273   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4274   rtx hi = GEN_INT (hi_val);
4275   rtx lo = GEN_INT (lo_val);
4276
4277   if (TARGET_THUMB1)
4278     return 0;
4279
4280   switch (code)
4281     {
4282     case AND:
4283     case IOR:
4284     case XOR:
4285       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4286               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4287     case PLUS:
4288       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4289
4290     default:
4291       return 0;
4292     }
4293 }
4294
4295 /* Emit a sequence of insns to handle a large constant.
4296    CODE is the code of the operation required, it can be any of SET, PLUS,
4297    IOR, AND, XOR, MINUS;
4298    MODE is the mode in which the operation is being performed;
4299    VAL is the integer to operate on;
4300    SOURCE is the other operand (a register, or a null-pointer for SET);
4301    SUBTARGETS means it is safe to create scratch registers if that will
4302    either produce a simpler sequence, or we will want to cse the values.
4303    Return value is the number of insns emitted.  */
4304
4305 /* ??? Tweak this for thumb2.  */
4306 int
4307 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4308                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4309 {
4310   rtx cond;
4311
4312   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4313     cond = COND_EXEC_TEST (PATTERN (insn));
4314   else
4315     cond = NULL_RTX;
4316
4317   if (subtargets || code == SET
4318       || (REG_P (target) && REG_P (source)
4319           && REGNO (target) != REGNO (source)))
4320     {
4321       /* After arm_reorg has been called, we can't fix up expensive
4322          constants by pushing them into memory so we must synthesize
4323          them in-line, regardless of the cost.  This is only likely to
4324          be more costly on chips that have load delay slots and we are
4325          compiling without running the scheduler (so no splitting
4326          occurred before the final instruction emission).
4327
4328          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4329       */
4330       if (!cfun->machine->after_arm_reorg
4331           && !cond
4332           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4333                                 1, 0)
4334               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4335                  + (code != SET))))
4336         {
4337           if (code == SET)
4338             {
4339               /* Currently SET is the only monadic value for CODE, all
4340                  the rest are diadic.  */
4341               if (TARGET_USE_MOVT)
4342                 arm_emit_movpair (target, GEN_INT (val));
4343               else
4344                 emit_set_insn (target, GEN_INT (val));
4345
4346               return 1;
4347             }
4348           else
4349             {
4350               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4351
4352               if (TARGET_USE_MOVT)
4353                 arm_emit_movpair (temp, GEN_INT (val));
4354               else
4355                 emit_set_insn (temp, GEN_INT (val));
4356
4357               /* For MINUS, the value is subtracted from, since we never
4358                  have subtraction of a constant.  */
4359               if (code == MINUS)
4360                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4361               else
4362                 emit_set_insn (target,
4363                                gen_rtx_fmt_ee (code, mode, source, temp));
4364               return 2;
4365             }
4366         }
4367     }
4368
4369   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4370                            1);
4371 }
4372
4373 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4374    ARM/THUMB2 immediates, and add up to VAL.
4375    Thr function return value gives the number of insns required.  */
4376 static int
4377 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4378                             struct four_ints *return_sequence)
4379 {
4380   int best_consecutive_zeros = 0;
4381   int i;
4382   int best_start = 0;
4383   int insns1, insns2;
4384   struct four_ints tmp_sequence;
4385
4386   /* If we aren't targeting ARM, the best place to start is always at
4387      the bottom, otherwise look more closely.  */
4388   if (TARGET_ARM)
4389     {
4390       for (i = 0; i < 32; i += 2)
4391         {
4392           int consecutive_zeros = 0;
4393
4394           if (!(val & (3 << i)))
4395             {
4396               while ((i < 32) && !(val & (3 << i)))
4397                 {
4398                   consecutive_zeros += 2;
4399                   i += 2;
4400                 }
4401               if (consecutive_zeros > best_consecutive_zeros)
4402                 {
4403                   best_consecutive_zeros = consecutive_zeros;
4404                   best_start = i - consecutive_zeros;
4405                 }
4406               i -= 2;
4407             }
4408         }
4409     }
4410
4411   /* So long as it won't require any more insns to do so, it's
4412      desirable to emit a small constant (in bits 0...9) in the last
4413      insn.  This way there is more chance that it can be combined with
4414      a later addressing insn to form a pre-indexed load or store
4415      operation.  Consider:
4416
4417            *((volatile int *)0xe0000100) = 1;
4418            *((volatile int *)0xe0000110) = 2;
4419
4420      We want this to wind up as:
4421
4422             mov rA, #0xe0000000
4423             mov rB, #1
4424             str rB, [rA, #0x100]
4425             mov rB, #2
4426             str rB, [rA, #0x110]
4427
4428      rather than having to synthesize both large constants from scratch.
4429
4430      Therefore, we calculate how many insns would be required to emit
4431      the constant starting from `best_start', and also starting from
4432      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4433      yield a shorter sequence, we may as well use zero.  */
4434   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4435   if (best_start != 0
4436       && ((HOST_WIDE_INT_1U << best_start) < val))
4437     {
4438       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4439       if (insns2 <= insns1)
4440         {
4441           *return_sequence = tmp_sequence;
4442           insns1 = insns2;
4443         }
4444     }
4445
4446   return insns1;
4447 }
4448
4449 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4450 static int
4451 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4452                              struct four_ints *return_sequence, int i)
4453 {
4454   int remainder = val & 0xffffffff;
4455   int insns = 0;
4456
4457   /* Try and find a way of doing the job in either two or three
4458      instructions.
4459
4460      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4461      location.  We start at position I.  This may be the MSB, or
4462      optimial_immediate_sequence may have positioned it at the largest block
4463      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4464      wrapping around to the top of the word when we drop off the bottom.
4465      In the worst case this code should produce no more than four insns.
4466
4467      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4468      constants, shifted to any arbitrary location.  We should always start
4469      at the MSB.  */
4470   do
4471     {
4472       int end;
4473       unsigned int b1, b2, b3, b4;
4474       unsigned HOST_WIDE_INT result;
4475       int loc;
4476
4477       gcc_assert (insns < 4);
4478
4479       if (i <= 0)
4480         i += 32;
4481
4482       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4483       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4484         {
4485           loc = i;
4486           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4487             /* We can use addw/subw for the last 12 bits.  */
4488             result = remainder;
4489           else
4490             {
4491               /* Use an 8-bit shifted/rotated immediate.  */
4492               end = i - 8;
4493               if (end < 0)
4494                 end += 32;
4495               result = remainder & ((0x0ff << end)
4496                                    | ((i < end) ? (0xff >> (32 - end))
4497                                                 : 0));
4498               i -= 8;
4499             }
4500         }
4501       else
4502         {
4503           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4504              arbitrary shifts.  */
4505           i -= TARGET_ARM ? 2 : 1;
4506           continue;
4507         }
4508
4509       /* Next, see if we can do a better job with a thumb2 replicated
4510          constant.
4511
4512          We do it this way around to catch the cases like 0x01F001E0 where
4513          two 8-bit immediates would work, but a replicated constant would
4514          make it worse.
4515
4516          TODO: 16-bit constants that don't clear all the bits, but still win.
4517          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4518       if (TARGET_THUMB2)
4519         {
4520           b1 = (remainder & 0xff000000) >> 24;
4521           b2 = (remainder & 0x00ff0000) >> 16;
4522           b3 = (remainder & 0x0000ff00) >> 8;
4523           b4 = remainder & 0xff;
4524
4525           if (loc > 24)
4526             {
4527               /* The 8-bit immediate already found clears b1 (and maybe b2),
4528                  but must leave b3 and b4 alone.  */
4529
4530               /* First try to find a 32-bit replicated constant that clears
4531                  almost everything.  We can assume that we can't do it in one,
4532                  or else we wouldn't be here.  */
4533               unsigned int tmp = b1 & b2 & b3 & b4;
4534               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4535                                   + (tmp << 24);
4536               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4537                                             + (tmp == b3) + (tmp == b4);
4538               if (tmp
4539                   && (matching_bytes >= 3
4540                       || (matching_bytes == 2
4541                           && const_ok_for_op (remainder & ~tmp2, code))))
4542                 {
4543                   /* At least 3 of the bytes match, and the fourth has at
4544                      least as many bits set, or two of the bytes match
4545                      and it will only require one more insn to finish.  */
4546                   result = tmp2;
4547                   i = tmp != b1 ? 32
4548                       : tmp != b2 ? 24
4549                       : tmp != b3 ? 16
4550                       : 8;
4551                 }
4552
4553               /* Second, try to find a 16-bit replicated constant that can
4554                  leave three of the bytes clear.  If b2 or b4 is already
4555                  zero, then we can.  If the 8-bit from above would not
4556                  clear b2 anyway, then we still win.  */
4557               else if (b1 == b3 && (!b2 || !b4
4558                                || (remainder & 0x00ff0000 & ~result)))
4559                 {
4560                   result = remainder & 0xff00ff00;
4561                   i = 24;
4562                 }
4563             }
4564           else if (loc > 16)
4565             {
4566               /* The 8-bit immediate already found clears b2 (and maybe b3)
4567                  and we don't get here unless b1 is alredy clear, but it will
4568                  leave b4 unchanged.  */
4569
4570               /* If we can clear b2 and b4 at once, then we win, since the
4571                  8-bits couldn't possibly reach that far.  */
4572               if (b2 == b4)
4573                 {
4574                   result = remainder & 0x00ff00ff;
4575                   i = 16;
4576                 }
4577             }
4578         }
4579
4580       return_sequence->i[insns++] = result;
4581       remainder &= ~result;
4582
4583       if (code == SET || code == MINUS)
4584         code = PLUS;
4585     }
4586   while (remainder);
4587
4588   return insns;
4589 }
4590
4591 /* Emit an instruction with the indicated PATTERN.  If COND is
4592    non-NULL, conditionalize the execution of the instruction on COND
4593    being true.  */
4594
4595 static void
4596 emit_constant_insn (rtx cond, rtx pattern)
4597 {
4598   if (cond)
4599     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4600   emit_insn (pattern);
4601 }
4602
4603 /* As above, but extra parameter GENERATE which, if clear, suppresses
4604    RTL generation.  */
4605
4606 static int
4607 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4608                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
4609                   int subtargets, int generate)
4610 {
4611   int can_invert = 0;
4612   int can_negate = 0;
4613   int final_invert = 0;
4614   int i;
4615   int set_sign_bit_copies = 0;
4616   int clear_sign_bit_copies = 0;
4617   int clear_zero_bit_copies = 0;
4618   int set_zero_bit_copies = 0;
4619   int insns = 0, neg_insns, inv_insns;
4620   unsigned HOST_WIDE_INT temp1, temp2;
4621   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4622   struct four_ints *immediates;
4623   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4624
4625   /* Find out which operations are safe for a given CODE.  Also do a quick
4626      check for degenerate cases; these can occur when DImode operations
4627      are split.  */
4628   switch (code)
4629     {
4630     case SET:
4631       can_invert = 1;
4632       break;
4633
4634     case PLUS:
4635       can_negate = 1;
4636       break;
4637
4638     case IOR:
4639       if (remainder == 0xffffffff)
4640         {
4641           if (generate)
4642             emit_constant_insn (cond,
4643                                 gen_rtx_SET (target,
4644                                              GEN_INT (ARM_SIGN_EXTEND (val))));
4645           return 1;
4646         }
4647
4648       if (remainder == 0)
4649         {
4650           if (reload_completed && rtx_equal_p (target, source))
4651             return 0;
4652
4653           if (generate)
4654             emit_constant_insn (cond, gen_rtx_SET (target, source));
4655           return 1;
4656         }
4657       break;
4658
4659     case AND:
4660       if (remainder == 0)
4661         {
4662           if (generate)
4663             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4664           return 1;
4665         }
4666       if (remainder == 0xffffffff)
4667         {
4668           if (reload_completed && rtx_equal_p (target, source))
4669             return 0;
4670           if (generate)
4671             emit_constant_insn (cond, gen_rtx_SET (target, source));
4672           return 1;
4673         }
4674       can_invert = 1;
4675       break;
4676
4677     case XOR:
4678       if (remainder == 0)
4679         {
4680           if (reload_completed && rtx_equal_p (target, source))
4681             return 0;
4682           if (generate)
4683             emit_constant_insn (cond, gen_rtx_SET (target, source));
4684           return 1;
4685         }
4686
4687       if (remainder == 0xffffffff)
4688         {
4689           if (generate)
4690             emit_constant_insn (cond,
4691                                 gen_rtx_SET (target,
4692                                              gen_rtx_NOT (mode, source)));
4693           return 1;
4694         }
4695       final_invert = 1;
4696       break;
4697
4698     case MINUS:
4699       /* We treat MINUS as (val - source), since (source - val) is always
4700          passed as (source + (-val)).  */
4701       if (remainder == 0)
4702         {
4703           if (generate)
4704             emit_constant_insn (cond,
4705                                 gen_rtx_SET (target,
4706                                              gen_rtx_NEG (mode, source)));
4707           return 1;
4708         }
4709       if (const_ok_for_arm (val))
4710         {
4711           if (generate)
4712             emit_constant_insn (cond,
4713                                 gen_rtx_SET (target,
4714                                              gen_rtx_MINUS (mode, GEN_INT (val),
4715                                                             source)));
4716           return 1;
4717         }
4718
4719       break;
4720
4721     default:
4722       gcc_unreachable ();
4723     }
4724
4725   /* If we can do it in one insn get out quickly.  */
4726   if (const_ok_for_op (val, code))
4727     {
4728       if (generate)
4729         emit_constant_insn (cond,
4730                             gen_rtx_SET (target,
4731                                          (source
4732                                           ? gen_rtx_fmt_ee (code, mode, source,
4733                                                             GEN_INT (val))
4734                                           : GEN_INT (val))));
4735       return 1;
4736     }
4737
4738   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4739      insn.  */
4740   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4741       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4742     {
4743       if (generate)
4744         {
4745           if (mode == SImode && i == 16)
4746             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4747                smaller insn.  */
4748             emit_constant_insn (cond,
4749                                 gen_zero_extendhisi2
4750                                 (target, gen_lowpart (HImode, source)));
4751           else
4752             /* Extz only supports SImode, but we can coerce the operands
4753                into that mode.  */
4754             emit_constant_insn (cond,
4755                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4756                                               gen_lowpart (SImode, source),
4757                                               GEN_INT (i), const0_rtx));
4758         }
4759
4760       return 1;
4761     }
4762
4763   /* Calculate a few attributes that may be useful for specific
4764      optimizations.  */
4765   /* Count number of leading zeros.  */
4766   for (i = 31; i >= 0; i--)
4767     {
4768       if ((remainder & (1 << i)) == 0)
4769         clear_sign_bit_copies++;
4770       else
4771         break;
4772     }
4773
4774   /* Count number of leading 1's.  */
4775   for (i = 31; i >= 0; i--)
4776     {
4777       if ((remainder & (1 << i)) != 0)
4778         set_sign_bit_copies++;
4779       else
4780         break;
4781     }
4782
4783   /* Count number of trailing zero's.  */
4784   for (i = 0; i <= 31; i++)
4785     {
4786       if ((remainder & (1 << i)) == 0)
4787         clear_zero_bit_copies++;
4788       else
4789         break;
4790     }
4791
4792   /* Count number of trailing 1's.  */
4793   for (i = 0; i <= 31; i++)
4794     {
4795       if ((remainder & (1 << i)) != 0)
4796         set_zero_bit_copies++;
4797       else
4798         break;
4799     }
4800
4801   switch (code)
4802     {
4803     case SET:
4804       /* See if we can do this by sign_extending a constant that is known
4805          to be negative.  This is a good, way of doing it, since the shift
4806          may well merge into a subsequent insn.  */
4807       if (set_sign_bit_copies > 1)
4808         {
4809           if (const_ok_for_arm
4810               (temp1 = ARM_SIGN_EXTEND (remainder
4811                                         << (set_sign_bit_copies - 1))))
4812             {
4813               if (generate)
4814                 {
4815                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4816                   emit_constant_insn (cond,
4817                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4818                   emit_constant_insn (cond,
4819                                       gen_ashrsi3 (target, new_src,
4820                                                    GEN_INT (set_sign_bit_copies - 1)));
4821                 }
4822               return 2;
4823             }
4824           /* For an inverted constant, we will need to set the low bits,
4825              these will be shifted out of harm's way.  */
4826           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4827           if (const_ok_for_arm (~temp1))
4828             {
4829               if (generate)
4830                 {
4831                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4832                   emit_constant_insn (cond,
4833                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4834                   emit_constant_insn (cond,
4835                                       gen_ashrsi3 (target, new_src,
4836                                                    GEN_INT (set_sign_bit_copies - 1)));
4837                 }
4838               return 2;
4839             }
4840         }
4841
4842       /* See if we can calculate the value as the difference between two
4843          valid immediates.  */
4844       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4845         {
4846           int topshift = clear_sign_bit_copies & ~1;
4847
4848           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4849                                    & (0xff000000 >> topshift));
4850
4851           /* If temp1 is zero, then that means the 9 most significant
4852              bits of remainder were 1 and we've caused it to overflow.
4853              When topshift is 0 we don't need to do anything since we
4854              can borrow from 'bit 32'.  */
4855           if (temp1 == 0 && topshift != 0)
4856             temp1 = 0x80000000 >> (topshift - 1);
4857
4858           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4859
4860           if (const_ok_for_arm (temp2))
4861             {
4862               if (generate)
4863                 {
4864                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4865                   emit_constant_insn (cond,
4866                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
4867                   emit_constant_insn (cond,
4868                                       gen_addsi3 (target, new_src,
4869                                                   GEN_INT (-temp2)));
4870                 }
4871
4872               return 2;
4873             }
4874         }
4875
4876       /* See if we can generate this by setting the bottom (or the top)
4877          16 bits, and then shifting these into the other half of the
4878          word.  We only look for the simplest cases, to do more would cost
4879          too much.  Be careful, however, not to generate this when the
4880          alternative would take fewer insns.  */
4881       if (val & 0xffff0000)
4882         {
4883           temp1 = remainder & 0xffff0000;
4884           temp2 = remainder & 0x0000ffff;
4885
4886           /* Overlaps outside this range are best done using other methods.  */
4887           for (i = 9; i < 24; i++)
4888             {
4889               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4890                   && !const_ok_for_arm (temp2))
4891                 {
4892                   rtx new_src = (subtargets
4893                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4894                                  : target);
4895                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4896                                             source, subtargets, generate);
4897                   source = new_src;
4898                   if (generate)
4899                     emit_constant_insn
4900                       (cond,
4901                        gen_rtx_SET
4902                        (target,
4903                         gen_rtx_IOR (mode,
4904                                      gen_rtx_ASHIFT (mode, source,
4905                                                      GEN_INT (i)),
4906                                      source)));
4907                   return insns + 1;
4908                 }
4909             }
4910
4911           /* Don't duplicate cases already considered.  */
4912           for (i = 17; i < 24; i++)
4913             {
4914               if (((temp1 | (temp1 >> i)) == remainder)
4915                   && !const_ok_for_arm (temp1))
4916                 {
4917                   rtx new_src = (subtargets
4918                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4919                                  : target);
4920                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4921                                             source, subtargets, generate);
4922                   source = new_src;
4923                   if (generate)
4924                     emit_constant_insn
4925                       (cond,
4926                        gen_rtx_SET (target,
4927                                     gen_rtx_IOR
4928                                     (mode,
4929                                      gen_rtx_LSHIFTRT (mode, source,
4930                                                        GEN_INT (i)),
4931                                      source)));
4932                   return insns + 1;
4933                 }
4934             }
4935         }
4936       break;
4937
4938     case IOR:
4939     case XOR:
4940       /* If we have IOR or XOR, and the constant can be loaded in a
4941          single instruction, and we can find a temporary to put it in,
4942          then this can be done in two instructions instead of 3-4.  */
4943       if (subtargets
4944           /* TARGET can't be NULL if SUBTARGETS is 0 */
4945           || (reload_completed && !reg_mentioned_p (target, source)))
4946         {
4947           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4948             {
4949               if (generate)
4950                 {
4951                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4952
4953                   emit_constant_insn (cond,
4954                                       gen_rtx_SET (sub, GEN_INT (val)));
4955                   emit_constant_insn (cond,
4956                                       gen_rtx_SET (target,
4957                                                    gen_rtx_fmt_ee (code, mode,
4958                                                                    source, sub)));
4959                 }
4960               return 2;
4961             }
4962         }
4963
4964       if (code == XOR)
4965         break;
4966
4967       /*  Convert.
4968           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4969                              and the remainder 0s for e.g. 0xfff00000)
4970           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4971
4972           This can be done in 2 instructions by using shifts with mov or mvn.
4973           e.g. for
4974           x = x | 0xfff00000;
4975           we generate.
4976           mvn   r0, r0, asl #12
4977           mvn   r0, r0, lsr #12  */
4978       if (set_sign_bit_copies > 8
4979           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4980         {
4981           if (generate)
4982             {
4983               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4984               rtx shift = GEN_INT (set_sign_bit_copies);
4985
4986               emit_constant_insn
4987                 (cond,
4988                  gen_rtx_SET (sub,
4989                               gen_rtx_NOT (mode,
4990                                            gen_rtx_ASHIFT (mode,
4991                                                            source,
4992                                                            shift))));
4993               emit_constant_insn
4994                 (cond,
4995                  gen_rtx_SET (target,
4996                               gen_rtx_NOT (mode,
4997                                            gen_rtx_LSHIFTRT (mode, sub,
4998                                                              shift))));
4999             }
5000           return 2;
5001         }
5002
5003       /* Convert
5004           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5005            to
5006           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5007
5008           For eg. r0 = r0 | 0xfff
5009                mvn      r0, r0, lsr #12
5010                mvn      r0, r0, asl #12
5011
5012       */
5013       if (set_zero_bit_copies > 8
5014           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5015         {
5016           if (generate)
5017             {
5018               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5019               rtx shift = GEN_INT (set_zero_bit_copies);
5020
5021               emit_constant_insn
5022                 (cond,
5023                  gen_rtx_SET (sub,
5024                               gen_rtx_NOT (mode,
5025                                            gen_rtx_LSHIFTRT (mode,
5026                                                              source,
5027                                                              shift))));
5028               emit_constant_insn
5029                 (cond,
5030                  gen_rtx_SET (target,
5031                               gen_rtx_NOT (mode,
5032                                            gen_rtx_ASHIFT (mode, sub,
5033                                                            shift))));
5034             }
5035           return 2;
5036         }
5037
5038       /* This will never be reached for Thumb2 because orn is a valid
5039          instruction. This is for Thumb1 and the ARM 32 bit cases.
5040
5041          x = y | constant (such that ~constant is a valid constant)
5042          Transform this to
5043          x = ~(~y & ~constant).
5044       */
5045       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5046         {
5047           if (generate)
5048             {
5049               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5050               emit_constant_insn (cond,
5051                                   gen_rtx_SET (sub,
5052                                                gen_rtx_NOT (mode, source)));
5053               source = sub;
5054               if (subtargets)
5055                 sub = gen_reg_rtx (mode);
5056               emit_constant_insn (cond,
5057                                   gen_rtx_SET (sub,
5058                                                gen_rtx_AND (mode, source,
5059                                                             GEN_INT (temp1))));
5060               emit_constant_insn (cond,
5061                                   gen_rtx_SET (target,
5062                                                gen_rtx_NOT (mode, sub)));
5063             }
5064           return 3;
5065         }
5066       break;
5067
5068     case AND:
5069       /* See if two shifts will do 2 or more insn's worth of work.  */
5070       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5071         {
5072           HOST_WIDE_INT shift_mask = ((0xffffffff
5073                                        << (32 - clear_sign_bit_copies))
5074                                       & 0xffffffff);
5075
5076           if ((remainder | shift_mask) != 0xffffffff)
5077             {
5078               HOST_WIDE_INT new_val
5079                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5080
5081               if (generate)
5082                 {
5083                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5084                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5085                                             new_src, source, subtargets, 1);
5086                   source = new_src;
5087                 }
5088               else
5089                 {
5090                   rtx targ = subtargets ? NULL_RTX : target;
5091                   insns = arm_gen_constant (AND, mode, cond, new_val,
5092                                             targ, source, subtargets, 0);
5093                 }
5094             }
5095
5096           if (generate)
5097             {
5098               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5099               rtx shift = GEN_INT (clear_sign_bit_copies);
5100
5101               emit_insn (gen_ashlsi3 (new_src, source, shift));
5102               emit_insn (gen_lshrsi3 (target, new_src, shift));
5103             }
5104
5105           return insns + 2;
5106         }
5107
5108       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5109         {
5110           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5111
5112           if ((remainder | shift_mask) != 0xffffffff)
5113             {
5114               HOST_WIDE_INT new_val
5115                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5116               if (generate)
5117                 {
5118                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5119
5120                   insns = arm_gen_constant (AND, mode, cond, new_val,
5121                                             new_src, source, subtargets, 1);
5122                   source = new_src;
5123                 }
5124               else
5125                 {
5126                   rtx targ = subtargets ? NULL_RTX : target;
5127
5128                   insns = arm_gen_constant (AND, mode, cond, new_val,
5129                                             targ, source, subtargets, 0);
5130                 }
5131             }
5132
5133           if (generate)
5134             {
5135               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5136               rtx shift = GEN_INT (clear_zero_bit_copies);
5137
5138               emit_insn (gen_lshrsi3 (new_src, source, shift));
5139               emit_insn (gen_ashlsi3 (target, new_src, shift));
5140             }
5141
5142           return insns + 2;
5143         }
5144
5145       break;
5146
5147     default:
5148       break;
5149     }
5150
5151   /* Calculate what the instruction sequences would be if we generated it
5152      normally, negated, or inverted.  */
5153   if (code == AND)
5154     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5155     insns = 99;
5156   else
5157     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5158
5159   if (can_negate)
5160     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5161                                             &neg_immediates);
5162   else
5163     neg_insns = 99;
5164
5165   if (can_invert || final_invert)
5166     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5167                                             &inv_immediates);
5168   else
5169     inv_insns = 99;
5170
5171   immediates = &pos_immediates;
5172
5173   /* Is the negated immediate sequence more efficient?  */
5174   if (neg_insns < insns && neg_insns <= inv_insns)
5175     {
5176       insns = neg_insns;
5177       immediates = &neg_immediates;
5178     }
5179   else
5180     can_negate = 0;
5181
5182   /* Is the inverted immediate sequence more efficient?
5183      We must allow for an extra NOT instruction for XOR operations, although
5184      there is some chance that the final 'mvn' will get optimized later.  */
5185   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5186     {
5187       insns = inv_insns;
5188       immediates = &inv_immediates;
5189     }
5190   else
5191     {
5192       can_invert = 0;
5193       final_invert = 0;
5194     }
5195
5196   /* Now output the chosen sequence as instructions.  */
5197   if (generate)
5198     {
5199       for (i = 0; i < insns; i++)
5200         {
5201           rtx new_src, temp1_rtx;
5202
5203           temp1 = immediates->i[i];
5204
5205           if (code == SET || code == MINUS)
5206             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5207           else if ((final_invert || i < (insns - 1)) && subtargets)
5208             new_src = gen_reg_rtx (mode);
5209           else
5210             new_src = target;
5211
5212           if (can_invert)
5213             temp1 = ~temp1;
5214           else if (can_negate)
5215             temp1 = -temp1;
5216
5217           temp1 = trunc_int_for_mode (temp1, mode);
5218           temp1_rtx = GEN_INT (temp1);
5219
5220           if (code == SET)
5221             ;
5222           else if (code == MINUS)
5223             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5224           else
5225             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5226
5227           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5228           source = new_src;
5229
5230           if (code == SET)
5231             {
5232               can_negate = can_invert;
5233               can_invert = 0;
5234               code = PLUS;
5235             }
5236           else if (code == MINUS)
5237             code = PLUS;
5238         }
5239     }
5240
5241   if (final_invert)
5242     {
5243       if (generate)
5244         emit_constant_insn (cond, gen_rtx_SET (target,
5245                                                gen_rtx_NOT (mode, source)));
5246       insns++;
5247     }
5248
5249   return insns;
5250 }
5251
5252 /* Canonicalize a comparison so that we are more likely to recognize it.
5253    This can be done for a few constant compares, where we can make the
5254    immediate value easier to load.  */
5255
5256 static void
5257 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5258                              bool op0_preserve_value)
5259 {
5260   machine_mode mode;
5261   unsigned HOST_WIDE_INT i, maxval;
5262
5263   mode = GET_MODE (*op0);
5264   if (mode == VOIDmode)
5265     mode = GET_MODE (*op1);
5266
5267   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5268
5269   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5270      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5271      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5272      for GTU/LEU in Thumb mode.  */
5273   if (mode == DImode)
5274     {
5275
5276       if (*code == GT || *code == LE
5277           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5278         {
5279           /* Missing comparison.  First try to use an available
5280              comparison.  */
5281           if (CONST_INT_P (*op1))
5282             {
5283               i = INTVAL (*op1);
5284               switch (*code)
5285                 {
5286                 case GT:
5287                 case LE:
5288                   if (i != maxval
5289                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5290                     {
5291                       *op1 = GEN_INT (i + 1);
5292                       *code = *code == GT ? GE : LT;
5293                       return;
5294                     }
5295                   break;
5296                 case GTU:
5297                 case LEU:
5298                   if (i != ~((unsigned HOST_WIDE_INT) 0)
5299                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
5300                     {
5301                       *op1 = GEN_INT (i + 1);
5302                       *code = *code == GTU ? GEU : LTU;
5303                       return;
5304                     }
5305                   break;
5306                 default:
5307                   gcc_unreachable ();
5308                 }
5309             }
5310
5311           /* If that did not work, reverse the condition.  */
5312           if (!op0_preserve_value)
5313             {
5314               std::swap (*op0, *op1);
5315               *code = (int)swap_condition ((enum rtx_code)*code);
5316             }
5317         }
5318       return;
5319     }
5320
5321   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5322      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5323      to facilitate possible combining with a cmp into 'ands'.  */
5324   if (mode == SImode
5325       && GET_CODE (*op0) == ZERO_EXTEND
5326       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5327       && GET_MODE (XEXP (*op0, 0)) == QImode
5328       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5329       && subreg_lowpart_p (XEXP (*op0, 0))
5330       && *op1 == const0_rtx)
5331     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5332                         GEN_INT (255));
5333
5334   /* Comparisons smaller than DImode.  Only adjust comparisons against
5335      an out-of-range constant.  */
5336   if (!CONST_INT_P (*op1)
5337       || const_ok_for_arm (INTVAL (*op1))
5338       || const_ok_for_arm (- INTVAL (*op1)))
5339     return;
5340
5341   i = INTVAL (*op1);
5342
5343   switch (*code)
5344     {
5345     case EQ:
5346     case NE:
5347       return;
5348
5349     case GT:
5350     case LE:
5351       if (i != maxval
5352           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5353         {
5354           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5355           *code = *code == GT ? GE : LT;
5356           return;
5357         }
5358       break;
5359
5360     case GE:
5361     case LT:
5362       if (i != ~maxval
5363           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5364         {
5365           *op1 = GEN_INT (i - 1);
5366           *code = *code == GE ? GT : LE;
5367           return;
5368         }
5369       break;
5370
5371     case GTU:
5372     case LEU:
5373       if (i != ~((unsigned HOST_WIDE_INT) 0)
5374           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5375         {
5376           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5377           *code = *code == GTU ? GEU : LTU;
5378           return;
5379         }
5380       break;
5381
5382     case GEU:
5383     case LTU:
5384       if (i != 0
5385           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5386         {
5387           *op1 = GEN_INT (i - 1);
5388           *code = *code == GEU ? GTU : LEU;
5389           return;
5390         }
5391       break;
5392
5393     default:
5394       gcc_unreachable ();
5395     }
5396 }
5397
5398
5399 /* Define how to find the value returned by a function.  */
5400
5401 static rtx
5402 arm_function_value(const_tree type, const_tree func,
5403                    bool outgoing ATTRIBUTE_UNUSED)
5404 {
5405   machine_mode mode;
5406   int unsignedp ATTRIBUTE_UNUSED;
5407   rtx r ATTRIBUTE_UNUSED;
5408
5409   mode = TYPE_MODE (type);
5410
5411   if (TARGET_AAPCS_BASED)
5412     return aapcs_allocate_return_reg (mode, type, func);
5413
5414   /* Promote integer types.  */
5415   if (INTEGRAL_TYPE_P (type))
5416     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5417
5418   /* Promotes small structs returned in a register to full-word size
5419      for big-endian AAPCS.  */
5420   if (arm_return_in_msb (type))
5421     {
5422       HOST_WIDE_INT size = int_size_in_bytes (type);
5423       if (size % UNITS_PER_WORD != 0)
5424         {
5425           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5426           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5427         }
5428     }
5429
5430   return arm_libcall_value_1 (mode);
5431 }
5432
5433 /* libcall hashtable helpers.  */
5434
5435 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5436 {
5437   static inline hashval_t hash (const rtx_def *);
5438   static inline bool equal (const rtx_def *, const rtx_def *);
5439   static inline void remove (rtx_def *);
5440 };
5441
5442 inline bool
5443 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5444 {
5445   return rtx_equal_p (p1, p2);
5446 }
5447
5448 inline hashval_t
5449 libcall_hasher::hash (const rtx_def *p1)
5450 {
5451   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5452 }
5453
5454 typedef hash_table<libcall_hasher> libcall_table_type;
5455
5456 static void
5457 add_libcall (libcall_table_type *htab, rtx libcall)
5458 {
5459   *htab->find_slot (libcall, INSERT) = libcall;
5460 }
5461
5462 static bool
5463 arm_libcall_uses_aapcs_base (const_rtx libcall)
5464 {
5465   static bool init_done = false;
5466   static libcall_table_type *libcall_htab = NULL;
5467
5468   if (!init_done)
5469     {
5470       init_done = true;
5471
5472       libcall_htab = new libcall_table_type (31);
5473       add_libcall (libcall_htab,
5474                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5475       add_libcall (libcall_htab,
5476                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5477       add_libcall (libcall_htab,
5478                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5479       add_libcall (libcall_htab,
5480                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5481
5482       add_libcall (libcall_htab,
5483                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5484       add_libcall (libcall_htab,
5485                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5486       add_libcall (libcall_htab,
5487                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5488       add_libcall (libcall_htab,
5489                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5490
5491       add_libcall (libcall_htab,
5492                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5493       add_libcall (libcall_htab,
5494                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5495       add_libcall (libcall_htab,
5496                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5497       add_libcall (libcall_htab,
5498                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5499       add_libcall (libcall_htab,
5500                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5501       add_libcall (libcall_htab,
5502                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5503       add_libcall (libcall_htab,
5504                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5505       add_libcall (libcall_htab,
5506                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5507
5508       /* Values from double-precision helper functions are returned in core
5509          registers if the selected core only supports single-precision
5510          arithmetic, even if we are using the hard-float ABI.  The same is
5511          true for single-precision helpers, but we will never be using the
5512          hard-float ABI on a CPU which doesn't support single-precision
5513          operations in hardware.  */
5514       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5515       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5516       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5517       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5518       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5519       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5520       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5521       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5522       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5523       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5524       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5525       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5526                                                         SFmode));
5527       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5528                                                         DFmode));
5529       add_libcall (libcall_htab,
5530                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5531     }
5532
5533   return libcall && libcall_htab->find (libcall) != NULL;
5534 }
5535
5536 static rtx
5537 arm_libcall_value_1 (machine_mode mode)
5538 {
5539   if (TARGET_AAPCS_BASED)
5540     return aapcs_libcall_value (mode);
5541   else if (TARGET_IWMMXT_ABI
5542            && arm_vector_mode_supported_p (mode))
5543     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5544   else
5545     return gen_rtx_REG (mode, ARG_REGISTER (1));
5546 }
5547
5548 /* Define how to find the value returned by a library function
5549    assuming the value has mode MODE.  */
5550
5551 static rtx
5552 arm_libcall_value (machine_mode mode, const_rtx libcall)
5553 {
5554   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5555       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5556     {
5557       /* The following libcalls return their result in integer registers,
5558          even though they return a floating point value.  */
5559       if (arm_libcall_uses_aapcs_base (libcall))
5560         return gen_rtx_REG (mode, ARG_REGISTER(1));
5561
5562     }
5563
5564   return arm_libcall_value_1 (mode);
5565 }
5566
5567 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5568
5569 static bool
5570 arm_function_value_regno_p (const unsigned int regno)
5571 {
5572   if (regno == ARG_REGISTER (1)
5573       || (TARGET_32BIT
5574           && TARGET_AAPCS_BASED
5575           && TARGET_HARD_FLOAT
5576           && regno == FIRST_VFP_REGNUM)
5577       || (TARGET_IWMMXT_ABI
5578           && regno == FIRST_IWMMXT_REGNUM))
5579     return true;
5580
5581   return false;
5582 }
5583
5584 /* Determine the amount of memory needed to store the possible return
5585    registers of an untyped call.  */
5586 int
5587 arm_apply_result_size (void)
5588 {
5589   int size = 16;
5590
5591   if (TARGET_32BIT)
5592     {
5593       if (TARGET_HARD_FLOAT_ABI)
5594         size += 32;
5595       if (TARGET_IWMMXT_ABI)
5596         size += 8;
5597     }
5598
5599   return size;
5600 }
5601
5602 /* Decide whether TYPE should be returned in memory (true)
5603    or in a register (false).  FNTYPE is the type of the function making
5604    the call.  */
5605 static bool
5606 arm_return_in_memory (const_tree type, const_tree fntype)
5607 {
5608   HOST_WIDE_INT size;
5609
5610   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5611
5612   if (TARGET_AAPCS_BASED)
5613     {
5614       /* Simple, non-aggregate types (ie not including vectors and
5615          complex) are always returned in a register (or registers).
5616          We don't care about which register here, so we can short-cut
5617          some of the detail.  */
5618       if (!AGGREGATE_TYPE_P (type)
5619           && TREE_CODE (type) != VECTOR_TYPE
5620           && TREE_CODE (type) != COMPLEX_TYPE)
5621         return false;
5622
5623       /* Any return value that is no larger than one word can be
5624          returned in r0.  */
5625       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5626         return false;
5627
5628       /* Check any available co-processors to see if they accept the
5629          type as a register candidate (VFP, for example, can return
5630          some aggregates in consecutive registers).  These aren't
5631          available if the call is variadic.  */
5632       if (aapcs_select_return_coproc (type, fntype) >= 0)
5633         return false;
5634
5635       /* Vector values should be returned using ARM registers, not
5636          memory (unless they're over 16 bytes, which will break since
5637          we only have four call-clobbered registers to play with).  */
5638       if (TREE_CODE (type) == VECTOR_TYPE)
5639         return (size < 0 || size > (4 * UNITS_PER_WORD));
5640
5641       /* The rest go in memory.  */
5642       return true;
5643     }
5644
5645   if (TREE_CODE (type) == VECTOR_TYPE)
5646     return (size < 0 || size > (4 * UNITS_PER_WORD));
5647
5648   if (!AGGREGATE_TYPE_P (type) &&
5649       (TREE_CODE (type) != VECTOR_TYPE))
5650     /* All simple types are returned in registers.  */
5651     return false;
5652
5653   if (arm_abi != ARM_ABI_APCS)
5654     {
5655       /* ATPCS and later return aggregate types in memory only if they are
5656          larger than a word (or are variable size).  */
5657       return (size < 0 || size > UNITS_PER_WORD);
5658     }
5659
5660   /* For the arm-wince targets we choose to be compatible with Microsoft's
5661      ARM and Thumb compilers, which always return aggregates in memory.  */
5662 #ifndef ARM_WINCE
5663   /* All structures/unions bigger than one word are returned in memory.
5664      Also catch the case where int_size_in_bytes returns -1.  In this case
5665      the aggregate is either huge or of variable size, and in either case
5666      we will want to return it via memory and not in a register.  */
5667   if (size < 0 || size > UNITS_PER_WORD)
5668     return true;
5669
5670   if (TREE_CODE (type) == RECORD_TYPE)
5671     {
5672       tree field;
5673
5674       /* For a struct the APCS says that we only return in a register
5675          if the type is 'integer like' and every addressable element
5676          has an offset of zero.  For practical purposes this means
5677          that the structure can have at most one non bit-field element
5678          and that this element must be the first one in the structure.  */
5679
5680       /* Find the first field, ignoring non FIELD_DECL things which will
5681          have been created by C++.  */
5682       for (field = TYPE_FIELDS (type);
5683            field && TREE_CODE (field) != FIELD_DECL;
5684            field = DECL_CHAIN (field))
5685         continue;
5686
5687       if (field == NULL)
5688         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5689
5690       /* Check that the first field is valid for returning in a register.  */
5691
5692       /* ... Floats are not allowed */
5693       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5694         return true;
5695
5696       /* ... Aggregates that are not themselves valid for returning in
5697          a register are not allowed.  */
5698       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5699         return true;
5700
5701       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5702          since they are not addressable.  */
5703       for (field = DECL_CHAIN (field);
5704            field;
5705            field = DECL_CHAIN (field))
5706         {
5707           if (TREE_CODE (field) != FIELD_DECL)
5708             continue;
5709
5710           if (!DECL_BIT_FIELD_TYPE (field))
5711             return true;
5712         }
5713
5714       return false;
5715     }
5716
5717   if (TREE_CODE (type) == UNION_TYPE)
5718     {
5719       tree field;
5720
5721       /* Unions can be returned in registers if every element is
5722          integral, or can be returned in an integer register.  */
5723       for (field = TYPE_FIELDS (type);
5724            field;
5725            field = DECL_CHAIN (field))
5726         {
5727           if (TREE_CODE (field) != FIELD_DECL)
5728             continue;
5729
5730           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5731             return true;
5732
5733           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5734             return true;
5735         }
5736
5737       return false;
5738     }
5739 #endif /* not ARM_WINCE */
5740
5741   /* Return all other types in memory.  */
5742   return true;
5743 }
5744
5745 const struct pcs_attribute_arg
5746 {
5747   const char *arg;
5748   enum arm_pcs value;
5749 } pcs_attribute_args[] =
5750   {
5751     {"aapcs", ARM_PCS_AAPCS},
5752     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5753 #if 0
5754     /* We could recognize these, but changes would be needed elsewhere
5755      * to implement them.  */
5756     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5757     {"atpcs", ARM_PCS_ATPCS},
5758     {"apcs", ARM_PCS_APCS},
5759 #endif
5760     {NULL, ARM_PCS_UNKNOWN}
5761   };
5762
5763 static enum arm_pcs
5764 arm_pcs_from_attribute (tree attr)
5765 {
5766   const struct pcs_attribute_arg *ptr;
5767   const char *arg;
5768
5769   /* Get the value of the argument.  */
5770   if (TREE_VALUE (attr) == NULL_TREE
5771       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5772     return ARM_PCS_UNKNOWN;
5773
5774   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5775
5776   /* Check it against the list of known arguments.  */
5777   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5778     if (streq (arg, ptr->arg))
5779       return ptr->value;
5780
5781   /* An unrecognized interrupt type.  */
5782   return ARM_PCS_UNKNOWN;
5783 }
5784
5785 /* Get the PCS variant to use for this call.  TYPE is the function's type
5786    specification, DECL is the specific declartion.  DECL may be null if
5787    the call could be indirect or if this is a library call.  */
5788 static enum arm_pcs
5789 arm_get_pcs_model (const_tree type, const_tree decl)
5790 {
5791   bool user_convention = false;
5792   enum arm_pcs user_pcs = arm_pcs_default;
5793   tree attr;
5794
5795   gcc_assert (type);
5796
5797   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5798   if (attr)
5799     {
5800       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5801       user_convention = true;
5802     }
5803
5804   if (TARGET_AAPCS_BASED)
5805     {
5806       /* Detect varargs functions.  These always use the base rules
5807          (no argument is ever a candidate for a co-processor
5808          register).  */
5809       bool base_rules = stdarg_p (type);
5810
5811       if (user_convention)
5812         {
5813           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5814             sorry ("non-AAPCS derived PCS variant");
5815           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5816             error ("variadic functions must use the base AAPCS variant");
5817         }
5818
5819       if (base_rules)
5820         return ARM_PCS_AAPCS;
5821       else if (user_convention)
5822         return user_pcs;
5823       else if (decl && flag_unit_at_a_time)
5824         {
5825           /* Local functions never leak outside this compilation unit,
5826              so we are free to use whatever conventions are
5827              appropriate.  */
5828           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5829           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5830           if (i && i->local)
5831             return ARM_PCS_AAPCS_LOCAL;
5832         }
5833     }
5834   else if (user_convention && user_pcs != arm_pcs_default)
5835     sorry ("PCS variant");
5836
5837   /* For everything else we use the target's default.  */
5838   return arm_pcs_default;
5839 }
5840
5841
5842 static void
5843 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5844                     const_tree fntype ATTRIBUTE_UNUSED,
5845                     rtx libcall ATTRIBUTE_UNUSED,
5846                     const_tree fndecl ATTRIBUTE_UNUSED)
5847 {
5848   /* Record the unallocated VFP registers.  */
5849   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5850   pcum->aapcs_vfp_reg_alloc = 0;
5851 }
5852
5853 /* Walk down the type tree of TYPE counting consecutive base elements.
5854    If *MODEP is VOIDmode, then set it to the first valid floating point
5855    type.  If a non-floating point type is found, or if a floating point
5856    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5857    otherwise return the count in the sub-tree.  */
5858 static int
5859 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5860 {
5861   machine_mode mode;
5862   HOST_WIDE_INT size;
5863
5864   switch (TREE_CODE (type))
5865     {
5866     case REAL_TYPE:
5867       mode = TYPE_MODE (type);
5868       if (mode != DFmode && mode != SFmode && mode != HFmode)
5869         return -1;
5870
5871       if (*modep == VOIDmode)
5872         *modep = mode;
5873
5874       if (*modep == mode)
5875         return 1;
5876
5877       break;
5878
5879     case COMPLEX_TYPE:
5880       mode = TYPE_MODE (TREE_TYPE (type));
5881       if (mode != DFmode && mode != SFmode)
5882         return -1;
5883
5884       if (*modep == VOIDmode)
5885         *modep = mode;
5886
5887       if (*modep == mode)
5888         return 2;
5889
5890       break;
5891
5892     case VECTOR_TYPE:
5893       /* Use V2SImode and V4SImode as representatives of all 64-bit
5894          and 128-bit vector types, whether or not those modes are
5895          supported with the present options.  */
5896       size = int_size_in_bytes (type);
5897       switch (size)
5898         {
5899         case 8:
5900           mode = V2SImode;
5901           break;
5902         case 16:
5903           mode = V4SImode;
5904           break;
5905         default:
5906           return -1;
5907         }
5908
5909       if (*modep == VOIDmode)
5910         *modep = mode;
5911
5912       /* Vector modes are considered to be opaque: two vectors are
5913          equivalent for the purposes of being homogeneous aggregates
5914          if they are the same size.  */
5915       if (*modep == mode)
5916         return 1;
5917
5918       break;
5919
5920     case ARRAY_TYPE:
5921       {
5922         int count;
5923         tree index = TYPE_DOMAIN (type);
5924
5925         /* Can't handle incomplete types nor sizes that are not
5926            fixed.  */
5927         if (!COMPLETE_TYPE_P (type)
5928             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5929           return -1;
5930
5931         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5932         if (count == -1
5933             || !index
5934             || !TYPE_MAX_VALUE (index)
5935             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5936             || !TYPE_MIN_VALUE (index)
5937             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5938             || count < 0)
5939           return -1;
5940
5941         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5942                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5943
5944         /* There must be no padding.  */
5945         if (wi::to_wide (TYPE_SIZE (type))
5946             != count * GET_MODE_BITSIZE (*modep))
5947           return -1;
5948
5949         return count;
5950       }
5951
5952     case RECORD_TYPE:
5953       {
5954         int count = 0;
5955         int sub_count;
5956         tree field;
5957
5958         /* Can't handle incomplete types nor sizes that are not
5959            fixed.  */
5960         if (!COMPLETE_TYPE_P (type)
5961             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5962           return -1;
5963
5964         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5965           {
5966             if (TREE_CODE (field) != FIELD_DECL)
5967               continue;
5968
5969             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5970             if (sub_count < 0)
5971               return -1;
5972             count += sub_count;
5973           }
5974
5975         /* There must be no padding.  */
5976         if (wi::to_wide (TYPE_SIZE (type))
5977             != count * GET_MODE_BITSIZE (*modep))
5978           return -1;
5979
5980         return count;
5981       }
5982
5983     case UNION_TYPE:
5984     case QUAL_UNION_TYPE:
5985       {
5986         /* These aren't very interesting except in a degenerate case.  */
5987         int count = 0;
5988         int sub_count;
5989         tree field;
5990
5991         /* Can't handle incomplete types nor sizes that are not
5992            fixed.  */
5993         if (!COMPLETE_TYPE_P (type)
5994             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5995           return -1;
5996
5997         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5998           {
5999             if (TREE_CODE (field) != FIELD_DECL)
6000               continue;
6001
6002             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6003             if (sub_count < 0)
6004               return -1;
6005             count = count > sub_count ? count : sub_count;
6006           }
6007
6008         /* There must be no padding.  */
6009         if (wi::to_wide (TYPE_SIZE (type))
6010             != count * GET_MODE_BITSIZE (*modep))
6011           return -1;
6012
6013         return count;
6014       }
6015
6016     default:
6017       break;
6018     }
6019
6020   return -1;
6021 }
6022
6023 /* Return true if PCS_VARIANT should use VFP registers.  */
6024 static bool
6025 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6026 {
6027   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6028     {
6029       static bool seen_thumb1_vfp = false;
6030
6031       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6032         {
6033           sorry ("Thumb-1 hard-float VFP ABI");
6034           /* sorry() is not immediately fatal, so only display this once.  */
6035           seen_thumb1_vfp = true;
6036         }
6037
6038       return true;
6039     }
6040
6041   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6042     return false;
6043
6044   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6045           (TARGET_VFP_DOUBLE || !is_double));
6046 }
6047
6048 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6049    suitable for passing or returning in VFP registers for the PCS
6050    variant selected.  If it is, then *BASE_MODE is updated to contain
6051    a machine mode describing each element of the argument's type and
6052    *COUNT to hold the number of such elements.  */
6053 static bool
6054 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6055                                        machine_mode mode, const_tree type,
6056                                        machine_mode *base_mode, int *count)
6057 {
6058   machine_mode new_mode = VOIDmode;
6059
6060   /* If we have the type information, prefer that to working things
6061      out from the mode.  */
6062   if (type)
6063     {
6064       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6065
6066       if (ag_count > 0 && ag_count <= 4)
6067         *count = ag_count;
6068       else
6069         return false;
6070     }
6071   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6072            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6073            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6074     {
6075       *count = 1;
6076       new_mode = mode;
6077     }
6078   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6079     {
6080       *count = 2;
6081       new_mode = (mode == DCmode ? DFmode : SFmode);
6082     }
6083   else
6084     return false;
6085
6086
6087   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6088     return false;
6089
6090   *base_mode = new_mode;
6091   return true;
6092 }
6093
6094 static bool
6095 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6096                                machine_mode mode, const_tree type)
6097 {
6098   int count ATTRIBUTE_UNUSED;
6099   machine_mode ag_mode ATTRIBUTE_UNUSED;
6100
6101   if (!use_vfp_abi (pcs_variant, false))
6102     return false;
6103   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6104                                                 &ag_mode, &count);
6105 }
6106
6107 static bool
6108 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6109                              const_tree type)
6110 {
6111   if (!use_vfp_abi (pcum->pcs_variant, false))
6112     return false;
6113
6114   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6115                                                 &pcum->aapcs_vfp_rmode,
6116                                                 &pcum->aapcs_vfp_rcount);
6117 }
6118
6119 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6120    for the behaviour of this function.  */
6121
6122 static bool
6123 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6124                     const_tree type  ATTRIBUTE_UNUSED)
6125 {
6126   int rmode_size
6127     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6128   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6129   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6130   int regno;
6131
6132   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6133     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6134       {
6135         pcum->aapcs_vfp_reg_alloc = mask << regno;
6136         if (mode == BLKmode
6137             || (mode == TImode && ! TARGET_NEON)
6138             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6139           {
6140             int i;
6141             int rcount = pcum->aapcs_vfp_rcount;
6142             int rshift = shift;
6143             machine_mode rmode = pcum->aapcs_vfp_rmode;
6144             rtx par;
6145             if (!TARGET_NEON)
6146               {
6147                 /* Avoid using unsupported vector modes.  */
6148                 if (rmode == V2SImode)
6149                   rmode = DImode;
6150                 else if (rmode == V4SImode)
6151                   {
6152                     rmode = DImode;
6153                     rcount *= 2;
6154                     rshift /= 2;
6155                   }
6156               }
6157             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6158             for (i = 0; i < rcount; i++)
6159               {
6160                 rtx tmp = gen_rtx_REG (rmode,
6161                                        FIRST_VFP_REGNUM + regno + i * rshift);
6162                 tmp = gen_rtx_EXPR_LIST
6163                   (VOIDmode, tmp,
6164                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6165                 XVECEXP (par, 0, i) = tmp;
6166               }
6167
6168             pcum->aapcs_reg = par;
6169           }
6170         else
6171           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6172         return true;
6173       }
6174   return false;
6175 }
6176
6177 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6178    comment there for the behaviour of this function.  */
6179
6180 static rtx
6181 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6182                                machine_mode mode,
6183                                const_tree type ATTRIBUTE_UNUSED)
6184 {
6185   if (!use_vfp_abi (pcs_variant, false))
6186     return NULL;
6187
6188   if (mode == BLKmode
6189       || (GET_MODE_CLASS (mode) == MODE_INT
6190           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6191           && !TARGET_NEON))
6192     {
6193       int count;
6194       machine_mode ag_mode;
6195       int i;
6196       rtx par;
6197       int shift;
6198
6199       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6200                                              &ag_mode, &count);
6201
6202       if (!TARGET_NEON)
6203         {
6204           if (ag_mode == V2SImode)
6205             ag_mode = DImode;
6206           else if (ag_mode == V4SImode)
6207             {
6208               ag_mode = DImode;
6209               count *= 2;
6210             }
6211         }
6212       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6213       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6214       for (i = 0; i < count; i++)
6215         {
6216           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6217           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6218                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6219           XVECEXP (par, 0, i) = tmp;
6220         }
6221
6222       return par;
6223     }
6224
6225   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6226 }
6227
6228 static void
6229 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6230                    machine_mode mode  ATTRIBUTE_UNUSED,
6231                    const_tree type  ATTRIBUTE_UNUSED)
6232 {
6233   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6234   pcum->aapcs_vfp_reg_alloc = 0;
6235   return;
6236 }
6237
6238 #define AAPCS_CP(X)                             \
6239   {                                             \
6240     aapcs_ ## X ## _cum_init,                   \
6241     aapcs_ ## X ## _is_call_candidate,          \
6242     aapcs_ ## X ## _allocate,                   \
6243     aapcs_ ## X ## _is_return_candidate,        \
6244     aapcs_ ## X ## _allocate_return_reg,        \
6245     aapcs_ ## X ## _advance                     \
6246   }
6247
6248 /* Table of co-processors that can be used to pass arguments in
6249    registers.  Idealy no arugment should be a candidate for more than
6250    one co-processor table entry, but the table is processed in order
6251    and stops after the first match.  If that entry then fails to put
6252    the argument into a co-processor register, the argument will go on
6253    the stack.  */
6254 static struct
6255 {
6256   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6257   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6258
6259   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6260      BLKmode) is a candidate for this co-processor's registers; this
6261      function should ignore any position-dependent state in
6262      CUMULATIVE_ARGS and only use call-type dependent information.  */
6263   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6264
6265   /* Return true if the argument does get a co-processor register; it
6266      should set aapcs_reg to an RTX of the register allocated as is
6267      required for a return from FUNCTION_ARG.  */
6268   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6269
6270   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6271      be returned in this co-processor's registers.  */
6272   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6273
6274   /* Allocate and return an RTX element to hold the return type of a call.  This
6275      routine must not fail and will only be called if is_return_candidate
6276      returned true with the same parameters.  */
6277   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6278
6279   /* Finish processing this argument and prepare to start processing
6280      the next one.  */
6281   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6282 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6283   {
6284     AAPCS_CP(vfp)
6285   };
6286
6287 #undef AAPCS_CP
6288
6289 static int
6290 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6291                           const_tree type)
6292 {
6293   int i;
6294
6295   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6296     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6297       return i;
6298
6299   return -1;
6300 }
6301
6302 static int
6303 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6304 {
6305   /* We aren't passed a decl, so we can't check that a call is local.
6306      However, it isn't clear that that would be a win anyway, since it
6307      might limit some tail-calling opportunities.  */
6308   enum arm_pcs pcs_variant;
6309
6310   if (fntype)
6311     {
6312       const_tree fndecl = NULL_TREE;
6313
6314       if (TREE_CODE (fntype) == FUNCTION_DECL)
6315         {
6316           fndecl = fntype;
6317           fntype = TREE_TYPE (fntype);
6318         }
6319
6320       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6321     }
6322   else
6323     pcs_variant = arm_pcs_default;
6324
6325   if (pcs_variant != ARM_PCS_AAPCS)
6326     {
6327       int i;
6328
6329       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6330         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6331                                                         TYPE_MODE (type),
6332                                                         type))
6333           return i;
6334     }
6335   return -1;
6336 }
6337
6338 static rtx
6339 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6340                            const_tree fntype)
6341 {
6342   /* We aren't passed a decl, so we can't check that a call is local.
6343      However, it isn't clear that that would be a win anyway, since it
6344      might limit some tail-calling opportunities.  */
6345   enum arm_pcs pcs_variant;
6346   int unsignedp ATTRIBUTE_UNUSED;
6347
6348   if (fntype)
6349     {
6350       const_tree fndecl = NULL_TREE;
6351
6352       if (TREE_CODE (fntype) == FUNCTION_DECL)
6353         {
6354           fndecl = fntype;
6355           fntype = TREE_TYPE (fntype);
6356         }
6357
6358       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6359     }
6360   else
6361     pcs_variant = arm_pcs_default;
6362
6363   /* Promote integer types.  */
6364   if (type && INTEGRAL_TYPE_P (type))
6365     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6366
6367   if (pcs_variant != ARM_PCS_AAPCS)
6368     {
6369       int i;
6370
6371       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6372         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6373                                                         type))
6374           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6375                                                              mode, type);
6376     }
6377
6378   /* Promotes small structs returned in a register to full-word size
6379      for big-endian AAPCS.  */
6380   if (type && arm_return_in_msb (type))
6381     {
6382       HOST_WIDE_INT size = int_size_in_bytes (type);
6383       if (size % UNITS_PER_WORD != 0)
6384         {
6385           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6386           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6387         }
6388     }
6389
6390   return gen_rtx_REG (mode, R0_REGNUM);
6391 }
6392
6393 static rtx
6394 aapcs_libcall_value (machine_mode mode)
6395 {
6396   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6397       && GET_MODE_SIZE (mode) <= 4)
6398     mode = SImode;
6399
6400   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6401 }
6402
6403 /* Lay out a function argument using the AAPCS rules.  The rule
6404    numbers referred to here are those in the AAPCS.  */
6405 static void
6406 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6407                   const_tree type, bool named)
6408 {
6409   int nregs, nregs2;
6410   int ncrn;
6411
6412   /* We only need to do this once per argument.  */
6413   if (pcum->aapcs_arg_processed)
6414     return;
6415
6416   pcum->aapcs_arg_processed = true;
6417
6418   /* Special case: if named is false then we are handling an incoming
6419      anonymous argument which is on the stack.  */
6420   if (!named)
6421     return;
6422
6423   /* Is this a potential co-processor register candidate?  */
6424   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6425     {
6426       int slot = aapcs_select_call_coproc (pcum, mode, type);
6427       pcum->aapcs_cprc_slot = slot;
6428
6429       /* We don't have to apply any of the rules from part B of the
6430          preparation phase, these are handled elsewhere in the
6431          compiler.  */
6432
6433       if (slot >= 0)
6434         {
6435           /* A Co-processor register candidate goes either in its own
6436              class of registers or on the stack.  */
6437           if (!pcum->aapcs_cprc_failed[slot])
6438             {
6439               /* C1.cp - Try to allocate the argument to co-processor
6440                  registers.  */
6441               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6442                 return;
6443
6444               /* C2.cp - Put the argument on the stack and note that we
6445                  can't assign any more candidates in this slot.  We also
6446                  need to note that we have allocated stack space, so that
6447                  we won't later try to split a non-cprc candidate between
6448                  core registers and the stack.  */
6449               pcum->aapcs_cprc_failed[slot] = true;
6450               pcum->can_split = false;
6451             }
6452
6453           /* We didn't get a register, so this argument goes on the
6454              stack.  */
6455           gcc_assert (pcum->can_split == false);
6456           return;
6457         }
6458     }
6459
6460   /* C3 - For double-word aligned arguments, round the NCRN up to the
6461      next even number.  */
6462   ncrn = pcum->aapcs_ncrn;
6463   if (ncrn & 1)
6464     {
6465       int res = arm_needs_doubleword_align (mode, type);
6466       /* Only warn during RTL expansion of call stmts, otherwise we would
6467          warn e.g. during gimplification even on functions that will be
6468          always inlined, and we'd warn multiple times.  Don't warn when
6469          called in expand_function_start either, as we warn instead in
6470          arm_function_arg_boundary in that case.  */
6471       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6472         inform (input_location, "parameter passing for argument of type "
6473                 "%qT changed in GCC 7.1", type);
6474       else if (res > 0)
6475         ncrn++;
6476     }
6477
6478   nregs = ARM_NUM_REGS2(mode, type);
6479
6480   /* Sigh, this test should really assert that nregs > 0, but a GCC
6481      extension allows empty structs and then gives them empty size; it
6482      then allows such a structure to be passed by value.  For some of
6483      the code below we have to pretend that such an argument has
6484      non-zero size so that we 'locate' it correctly either in
6485      registers or on the stack.  */
6486   gcc_assert (nregs >= 0);
6487
6488   nregs2 = nregs ? nregs : 1;
6489
6490   /* C4 - Argument fits entirely in core registers.  */
6491   if (ncrn + nregs2 <= NUM_ARG_REGS)
6492     {
6493       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6494       pcum->aapcs_next_ncrn = ncrn + nregs;
6495       return;
6496     }
6497
6498   /* C5 - Some core registers left and there are no arguments already
6499      on the stack: split this argument between the remaining core
6500      registers and the stack.  */
6501   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6502     {
6503       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6504       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6505       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6506       return;
6507     }
6508
6509   /* C6 - NCRN is set to 4.  */
6510   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6511
6512   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6513   return;
6514 }
6515
6516 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6517    for a call to a function whose data type is FNTYPE.
6518    For a library call, FNTYPE is NULL.  */
6519 void
6520 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6521                           rtx libname,
6522                           tree fndecl ATTRIBUTE_UNUSED)
6523 {
6524   /* Long call handling.  */
6525   if (fntype)
6526     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6527   else
6528     pcum->pcs_variant = arm_pcs_default;
6529
6530   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6531     {
6532       if (arm_libcall_uses_aapcs_base (libname))
6533         pcum->pcs_variant = ARM_PCS_AAPCS;
6534
6535       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6536       pcum->aapcs_reg = NULL_RTX;
6537       pcum->aapcs_partial = 0;
6538       pcum->aapcs_arg_processed = false;
6539       pcum->aapcs_cprc_slot = -1;
6540       pcum->can_split = true;
6541
6542       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6543         {
6544           int i;
6545
6546           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6547             {
6548               pcum->aapcs_cprc_failed[i] = false;
6549               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6550             }
6551         }
6552       return;
6553     }
6554
6555   /* Legacy ABIs */
6556
6557   /* On the ARM, the offset starts at 0.  */
6558   pcum->nregs = 0;
6559   pcum->iwmmxt_nregs = 0;
6560   pcum->can_split = true;
6561
6562   /* Varargs vectors are treated the same as long long.
6563      named_count avoids having to change the way arm handles 'named' */
6564   pcum->named_count = 0;
6565   pcum->nargs = 0;
6566
6567   if (TARGET_REALLY_IWMMXT && fntype)
6568     {
6569       tree fn_arg;
6570
6571       for (fn_arg = TYPE_ARG_TYPES (fntype);
6572            fn_arg;
6573            fn_arg = TREE_CHAIN (fn_arg))
6574         pcum->named_count += 1;
6575
6576       if (! pcum->named_count)
6577         pcum->named_count = INT_MAX;
6578     }
6579 }
6580
6581 /* Return 1 if double word alignment is required for argument passing.
6582    Return -1 if double word alignment used to be required for argument
6583    passing before PR77728 ABI fix, but is not required anymore.
6584    Return 0 if double word alignment is not required and wasn't requried
6585    before either.  */
6586 static int
6587 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6588 {
6589   if (!type)
6590     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6591
6592   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6593   if (!AGGREGATE_TYPE_P (type))
6594     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6595
6596   /* Array types: Use member alignment of element type.  */
6597   if (TREE_CODE (type) == ARRAY_TYPE)
6598     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6599
6600   int ret = 0;
6601   /* Record/aggregate types: Use greatest member alignment of any member.  */
6602   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6603     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6604       {
6605         if (TREE_CODE (field) == FIELD_DECL)
6606           return 1;
6607         else
6608           /* Before PR77728 fix, we were incorrectly considering also
6609              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6610              Make sure we can warn about that with -Wpsabi.  */
6611           ret = -1;
6612       }
6613
6614   return ret;
6615 }
6616
6617
6618 /* Determine where to put an argument to a function.
6619    Value is zero to push the argument on the stack,
6620    or a hard register in which to store the argument.
6621
6622    MODE is the argument's machine mode.
6623    TYPE is the data type of the argument (as a tree).
6624     This is null for libcalls where that information may
6625     not be available.
6626    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6627     the preceding args and about the function being called.
6628    NAMED is nonzero if this argument is a named parameter
6629     (otherwise it is an extra parameter matching an ellipsis).
6630
6631    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6632    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6633    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6634    defined), say it is passed in the stack (function_prologue will
6635    indeed make it pass in the stack if necessary).  */
6636
6637 static rtx
6638 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6639                   const_tree type, bool named)
6640 {
6641   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6642   int nregs;
6643
6644   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6645      a call insn (op3 of a call_value insn).  */
6646   if (mode == VOIDmode)
6647     return const0_rtx;
6648
6649   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6650     {
6651       aapcs_layout_arg (pcum, mode, type, named);
6652       return pcum->aapcs_reg;
6653     }
6654
6655   /* Varargs vectors are treated the same as long long.
6656      named_count avoids having to change the way arm handles 'named' */
6657   if (TARGET_IWMMXT_ABI
6658       && arm_vector_mode_supported_p (mode)
6659       && pcum->named_count > pcum->nargs + 1)
6660     {
6661       if (pcum->iwmmxt_nregs <= 9)
6662         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6663       else
6664         {
6665           pcum->can_split = false;
6666           return NULL_RTX;
6667         }
6668     }
6669
6670   /* Put doubleword aligned quantities in even register pairs.  */
6671   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6672     {
6673       int res = arm_needs_doubleword_align (mode, type);
6674       if (res < 0 && warn_psabi)
6675         inform (input_location, "parameter passing for argument of type "
6676                 "%qT changed in GCC 7.1", type);
6677       else if (res > 0)
6678         pcum->nregs++;
6679     }
6680
6681   /* Only allow splitting an arg between regs and memory if all preceding
6682      args were allocated to regs.  For args passed by reference we only count
6683      the reference pointer.  */
6684   if (pcum->can_split)
6685     nregs = 1;
6686   else
6687     nregs = ARM_NUM_REGS2 (mode, type);
6688
6689   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6690     return NULL_RTX;
6691
6692   return gen_rtx_REG (mode, pcum->nregs);
6693 }
6694
6695 static unsigned int
6696 arm_function_arg_boundary (machine_mode mode, const_tree type)
6697 {
6698   if (!ARM_DOUBLEWORD_ALIGN)
6699     return PARM_BOUNDARY;
6700
6701   int res = arm_needs_doubleword_align (mode, type);
6702   if (res < 0 && warn_psabi)
6703     inform (input_location, "parameter passing for argument of type %qT "
6704             "changed in GCC 7.1", type);
6705
6706   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6707 }
6708
6709 static int
6710 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6711                        tree type, bool named)
6712 {
6713   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6714   int nregs = pcum->nregs;
6715
6716   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6717     {
6718       aapcs_layout_arg (pcum, mode, type, named);
6719       return pcum->aapcs_partial;
6720     }
6721
6722   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6723     return 0;
6724
6725   if (NUM_ARG_REGS > nregs
6726       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6727       && pcum->can_split)
6728     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6729
6730   return 0;
6731 }
6732
6733 /* Update the data in PCUM to advance over an argument
6734    of mode MODE and data type TYPE.
6735    (TYPE is null for libcalls where that information may not be available.)  */
6736
6737 static void
6738 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6739                           const_tree type, bool named)
6740 {
6741   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6742
6743   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6744     {
6745       aapcs_layout_arg (pcum, mode, type, named);
6746
6747       if (pcum->aapcs_cprc_slot >= 0)
6748         {
6749           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6750                                                               type);
6751           pcum->aapcs_cprc_slot = -1;
6752         }
6753
6754       /* Generic stuff.  */
6755       pcum->aapcs_arg_processed = false;
6756       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6757       pcum->aapcs_reg = NULL_RTX;
6758       pcum->aapcs_partial = 0;
6759     }
6760   else
6761     {
6762       pcum->nargs += 1;
6763       if (arm_vector_mode_supported_p (mode)
6764           && pcum->named_count > pcum->nargs
6765           && TARGET_IWMMXT_ABI)
6766         pcum->iwmmxt_nregs += 1;
6767       else
6768         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6769     }
6770 }
6771
6772 /* Variable sized types are passed by reference.  This is a GCC
6773    extension to the ARM ABI.  */
6774
6775 static bool
6776 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6777                        machine_mode mode ATTRIBUTE_UNUSED,
6778                        const_tree type, bool named ATTRIBUTE_UNUSED)
6779 {
6780   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6781 }
6782 \f
6783 /* Encode the current state of the #pragma [no_]long_calls.  */
6784 typedef enum
6785 {
6786   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6787   LONG,         /* #pragma long_calls is in effect.  */
6788   SHORT         /* #pragma no_long_calls is in effect.  */
6789 } arm_pragma_enum;
6790
6791 static arm_pragma_enum arm_pragma_long_calls = OFF;
6792
6793 void
6794 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6795 {
6796   arm_pragma_long_calls = LONG;
6797 }
6798
6799 void
6800 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6801 {
6802   arm_pragma_long_calls = SHORT;
6803 }
6804
6805 void
6806 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6807 {
6808   arm_pragma_long_calls = OFF;
6809 }
6810 \f
6811 /* Handle an attribute requiring a FUNCTION_DECL;
6812    arguments as in struct attribute_spec.handler.  */
6813 static tree
6814 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6815                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6816 {
6817   if (TREE_CODE (*node) != FUNCTION_DECL)
6818     {
6819       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6820                name);
6821       *no_add_attrs = true;
6822     }
6823
6824   return NULL_TREE;
6825 }
6826
6827 /* Handle an "interrupt" or "isr" attribute;
6828    arguments as in struct attribute_spec.handler.  */
6829 static tree
6830 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6831                           bool *no_add_attrs)
6832 {
6833   if (DECL_P (*node))
6834     {
6835       if (TREE_CODE (*node) != FUNCTION_DECL)
6836         {
6837           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6838                    name);
6839           *no_add_attrs = true;
6840         }
6841       /* FIXME: the argument if any is checked for type attributes;
6842          should it be checked for decl ones?  */
6843     }
6844   else
6845     {
6846       if (TREE_CODE (*node) == FUNCTION_TYPE
6847           || TREE_CODE (*node) == METHOD_TYPE)
6848         {
6849           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6850             {
6851               warning (OPT_Wattributes, "%qE attribute ignored",
6852                        name);
6853               *no_add_attrs = true;
6854             }
6855         }
6856       else if (TREE_CODE (*node) == POINTER_TYPE
6857                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6858                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6859                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6860         {
6861           *node = build_variant_type_copy (*node);
6862           TREE_TYPE (*node) = build_type_attribute_variant
6863             (TREE_TYPE (*node),
6864              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6865           *no_add_attrs = true;
6866         }
6867       else
6868         {
6869           /* Possibly pass this attribute on from the type to a decl.  */
6870           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6871                        | (int) ATTR_FLAG_FUNCTION_NEXT
6872                        | (int) ATTR_FLAG_ARRAY_NEXT))
6873             {
6874               *no_add_attrs = true;
6875               return tree_cons (name, args, NULL_TREE);
6876             }
6877           else
6878             {
6879               warning (OPT_Wattributes, "%qE attribute ignored",
6880                        name);
6881             }
6882         }
6883     }
6884
6885   return NULL_TREE;
6886 }
6887
6888 /* Handle a "pcs" attribute; arguments as in struct
6889    attribute_spec.handler.  */
6890 static tree
6891 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6892                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6893 {
6894   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6895     {
6896       warning (OPT_Wattributes, "%qE attribute ignored", name);
6897       *no_add_attrs = true;
6898     }
6899   return NULL_TREE;
6900 }
6901
6902 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6903 /* Handle the "notshared" attribute.  This attribute is another way of
6904    requesting hidden visibility.  ARM's compiler supports
6905    "__declspec(notshared)"; we support the same thing via an
6906    attribute.  */
6907
6908 static tree
6909 arm_handle_notshared_attribute (tree *node,
6910                                 tree name ATTRIBUTE_UNUSED,
6911                                 tree args ATTRIBUTE_UNUSED,
6912                                 int flags ATTRIBUTE_UNUSED,
6913                                 bool *no_add_attrs)
6914 {
6915   tree decl = TYPE_NAME (*node);
6916
6917   if (decl)
6918     {
6919       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6920       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6921       *no_add_attrs = false;
6922     }
6923   return NULL_TREE;
6924 }
6925 #endif
6926
6927 /* This function returns true if a function with declaration FNDECL and type
6928    FNTYPE uses the stack to pass arguments or return variables and false
6929    otherwise.  This is used for functions with the attributes
6930    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6931    diagnostic messages if the stack is used.  NAME is the name of the attribute
6932    used.  */
6933
6934 static bool
6935 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6936 {
6937   function_args_iterator args_iter;
6938   CUMULATIVE_ARGS args_so_far_v;
6939   cumulative_args_t args_so_far;
6940   bool first_param = true;
6941   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6942
6943   /* Error out if any argument is passed on the stack.  */
6944   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6945   args_so_far = pack_cumulative_args (&args_so_far_v);
6946   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6947     {
6948       rtx arg_rtx;
6949       machine_mode arg_mode = TYPE_MODE (arg_type);
6950
6951       prev_arg_type = arg_type;
6952       if (VOID_TYPE_P (arg_type))
6953         continue;
6954
6955       if (!first_param)
6956         arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6957       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6958       if (!arg_rtx
6959           || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6960         {
6961           error ("%qE attribute not available to functions with arguments "
6962                  "passed on the stack", name);
6963           return true;
6964         }
6965       first_param = false;
6966     }
6967
6968   /* Error out for variadic functions since we cannot control how many
6969      arguments will be passed and thus stack could be used.  stdarg_p () is not
6970      used for the checking to avoid browsing arguments twice.  */
6971   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6972     {
6973       error ("%qE attribute not available to functions with variable number "
6974              "of arguments", name);
6975       return true;
6976     }
6977
6978   /* Error out if return value is passed on the stack.  */
6979   ret_type = TREE_TYPE (fntype);
6980   if (arm_return_in_memory (ret_type, fntype))
6981     {
6982       error ("%qE attribute not available to functions that return value on "
6983              "the stack", name);
6984       return true;
6985     }
6986   return false;
6987 }
6988
6989 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6990    function will check whether the attribute is allowed here and will add the
6991    attribute to the function declaration tree or otherwise issue a warning.  */
6992
6993 static tree
6994 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6995                                  tree /* args */,
6996                                  int /* flags */,
6997                                  bool *no_add_attrs)
6998 {
6999   tree fndecl;
7000
7001   if (!use_cmse)
7002     {
7003       *no_add_attrs = true;
7004       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7005                name);
7006       return NULL_TREE;
7007     }
7008
7009   /* Ignore attribute for function types.  */
7010   if (TREE_CODE (*node) != FUNCTION_DECL)
7011     {
7012       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7013                name);
7014       *no_add_attrs = true;
7015       return NULL_TREE;
7016     }
7017
7018   fndecl = *node;
7019
7020   /* Warn for static linkage functions.  */
7021   if (!TREE_PUBLIC (fndecl))
7022     {
7023       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7024                "with static linkage", name);
7025       *no_add_attrs = true;
7026       return NULL_TREE;
7027     }
7028
7029   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7030                                                 TREE_TYPE (fndecl));
7031   return NULL_TREE;
7032 }
7033
7034
7035 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7036    function will check whether the attribute is allowed here and will add the
7037    attribute to the function type tree or otherwise issue a diagnostic.  The
7038    reason we check this at declaration time is to only allow the use of the
7039    attribute with declarations of function pointers and not function
7040    declarations.  This function checks NODE is of the expected type and issues
7041    diagnostics otherwise using NAME.  If it is not of the expected type
7042    *NO_ADD_ATTRS will be set to true.  */
7043
7044 static tree
7045 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7046                                  tree /* args */,
7047                                  int /* flags */,
7048                                  bool *no_add_attrs)
7049 {
7050   tree decl = NULL_TREE, fntype = NULL_TREE;
7051   tree type;
7052
7053   if (!use_cmse)
7054     {
7055       *no_add_attrs = true;
7056       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7057                name);
7058       return NULL_TREE;
7059     }
7060
7061   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7062     {
7063       decl = *node;
7064       fntype = TREE_TYPE (decl);
7065     }
7066
7067   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7068     fntype = TREE_TYPE (fntype);
7069
7070   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7071     {
7072         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7073                  "function pointer", name);
7074         *no_add_attrs = true;
7075         return NULL_TREE;
7076     }
7077
7078   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7079
7080   if (*no_add_attrs)
7081     return NULL_TREE;
7082
7083   /* Prevent trees being shared among function types with and without
7084      cmse_nonsecure_call attribute.  */
7085   type = TREE_TYPE (decl);
7086
7087   type = build_distinct_type_copy (type);
7088   TREE_TYPE (decl) = type;
7089   fntype = type;
7090
7091   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7092     {
7093       type = fntype;
7094       fntype = TREE_TYPE (fntype);
7095       fntype = build_distinct_type_copy (fntype);
7096       TREE_TYPE (type) = fntype;
7097     }
7098
7099   /* Construct a type attribute and add it to the function type.  */
7100   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7101                           TYPE_ATTRIBUTES (fntype));
7102   TYPE_ATTRIBUTES (fntype) = attrs;
7103   return NULL_TREE;
7104 }
7105
7106 /* Return 0 if the attributes for two types are incompatible, 1 if they
7107    are compatible, and 2 if they are nearly compatible (which causes a
7108    warning to be generated).  */
7109 static int
7110 arm_comp_type_attributes (const_tree type1, const_tree type2)
7111 {
7112   int l1, l2, s1, s2;
7113
7114   /* Check for mismatch of non-default calling convention.  */
7115   if (TREE_CODE (type1) != FUNCTION_TYPE)
7116     return 1;
7117
7118   /* Check for mismatched call attributes.  */
7119   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7120   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7121   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7122   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7123
7124   /* Only bother to check if an attribute is defined.  */
7125   if (l1 | l2 | s1 | s2)
7126     {
7127       /* If one type has an attribute, the other must have the same attribute.  */
7128       if ((l1 != l2) || (s1 != s2))
7129         return 0;
7130
7131       /* Disallow mixed attributes.  */
7132       if ((l1 & s2) || (l2 & s1))
7133         return 0;
7134     }
7135
7136   /* Check for mismatched ISR attribute.  */
7137   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7138   if (! l1)
7139     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7140   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7141   if (! l2)
7142     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7143   if (l1 != l2)
7144     return 0;
7145
7146   l1 = lookup_attribute ("cmse_nonsecure_call",
7147                          TYPE_ATTRIBUTES (type1)) != NULL;
7148   l2 = lookup_attribute ("cmse_nonsecure_call",
7149                          TYPE_ATTRIBUTES (type2)) != NULL;
7150
7151   if (l1 != l2)
7152     return 0;
7153
7154   return 1;
7155 }
7156
7157 /*  Assigns default attributes to newly defined type.  This is used to
7158     set short_call/long_call attributes for function types of
7159     functions defined inside corresponding #pragma scopes.  */
7160 static void
7161 arm_set_default_type_attributes (tree type)
7162 {
7163   /* Add __attribute__ ((long_call)) to all functions, when
7164      inside #pragma long_calls or __attribute__ ((short_call)),
7165      when inside #pragma no_long_calls.  */
7166   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7167     {
7168       tree type_attr_list, attr_name;
7169       type_attr_list = TYPE_ATTRIBUTES (type);
7170
7171       if (arm_pragma_long_calls == LONG)
7172         attr_name = get_identifier ("long_call");
7173       else if (arm_pragma_long_calls == SHORT)
7174         attr_name = get_identifier ("short_call");
7175       else
7176         return;
7177
7178       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7179       TYPE_ATTRIBUTES (type) = type_attr_list;
7180     }
7181 }
7182 \f
7183 /* Return true if DECL is known to be linked into section SECTION.  */
7184
7185 static bool
7186 arm_function_in_section_p (tree decl, section *section)
7187 {
7188   /* We can only be certain about the prevailing symbol definition.  */
7189   if (!decl_binds_to_current_def_p (decl))
7190     return false;
7191
7192   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7193   if (!DECL_SECTION_NAME (decl))
7194     {
7195       /* Make sure that we will not create a unique section for DECL.  */
7196       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7197         return false;
7198     }
7199
7200   return function_section (decl) == section;
7201 }
7202
7203 /* Return nonzero if a 32-bit "long_call" should be generated for
7204    a call from the current function to DECL.  We generate a long_call
7205    if the function:
7206
7207         a.  has an __attribute__((long call))
7208      or b.  is within the scope of a #pragma long_calls
7209      or c.  the -mlong-calls command line switch has been specified
7210
7211    However we do not generate a long call if the function:
7212
7213         d.  has an __attribute__ ((short_call))
7214      or e.  is inside the scope of a #pragma no_long_calls
7215      or f.  is defined in the same section as the current function.  */
7216
7217 bool
7218 arm_is_long_call_p (tree decl)
7219 {
7220   tree attrs;
7221
7222   if (!decl)
7223     return TARGET_LONG_CALLS;
7224
7225   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7226   if (lookup_attribute ("short_call", attrs))
7227     return false;
7228
7229   /* For "f", be conservative, and only cater for cases in which the
7230      whole of the current function is placed in the same section.  */
7231   if (!flag_reorder_blocks_and_partition
7232       && TREE_CODE (decl) == FUNCTION_DECL
7233       && arm_function_in_section_p (decl, current_function_section ()))
7234     return false;
7235
7236   if (lookup_attribute ("long_call", attrs))
7237     return true;
7238
7239   return TARGET_LONG_CALLS;
7240 }
7241
7242 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7243 static bool
7244 arm_function_ok_for_sibcall (tree decl, tree exp)
7245 {
7246   unsigned long func_type;
7247
7248   if (cfun->machine->sibcall_blocked)
7249     return false;
7250
7251   /* Never tailcall something if we are generating code for Thumb-1.  */
7252   if (TARGET_THUMB1)
7253     return false;
7254
7255   /* The PIC register is live on entry to VxWorks PLT entries, so we
7256      must make the call before restoring the PIC register.  */
7257   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7258     return false;
7259
7260   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7261      may be used both as target of the call and base register for restoring
7262      the VFP registers  */
7263   if (TARGET_APCS_FRAME && TARGET_ARM
7264       && TARGET_HARD_FLOAT
7265       && decl && arm_is_long_call_p (decl))
7266     return false;
7267
7268   /* If we are interworking and the function is not declared static
7269      then we can't tail-call it unless we know that it exists in this
7270      compilation unit (since it might be a Thumb routine).  */
7271   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7272       && !TREE_ASM_WRITTEN (decl))
7273     return false;
7274
7275   func_type = arm_current_func_type ();
7276   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7277   if (IS_INTERRUPT (func_type))
7278     return false;
7279
7280   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7281      generated for entry functions themselves.  */
7282   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7283     return false;
7284
7285   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7286      this would complicate matters for later code generation.  */
7287   if (TREE_CODE (exp) == CALL_EXPR)
7288     {
7289       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7290       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7291         return false;
7292     }
7293
7294   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7295     {
7296       /* Check that the return value locations are the same.  For
7297          example that we aren't returning a value from the sibling in
7298          a VFP register but then need to transfer it to a core
7299          register.  */
7300       rtx a, b;
7301       tree decl_or_type = decl;
7302
7303       /* If it is an indirect function pointer, get the function type.  */
7304       if (!decl)
7305         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7306
7307       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7308       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7309                               cfun->decl, false);
7310       if (!rtx_equal_p (a, b))
7311         return false;
7312     }
7313
7314   /* Never tailcall if function may be called with a misaligned SP.  */
7315   if (IS_STACKALIGN (func_type))
7316     return false;
7317
7318   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7319      references should become a NOP.  Don't convert such calls into
7320      sibling calls.  */
7321   if (TARGET_AAPCS_BASED
7322       && arm_abi == ARM_ABI_AAPCS
7323       && decl
7324       && DECL_WEAK (decl))
7325     return false;
7326
7327   /* We cannot do a tailcall for an indirect call by descriptor if all the
7328      argument registers are used because the only register left to load the
7329      address is IP and it will already contain the static chain.  */
7330   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7331     {
7332       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7333       CUMULATIVE_ARGS cum;
7334       cumulative_args_t cum_v;
7335
7336       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7337       cum_v = pack_cumulative_args (&cum);
7338
7339       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7340         {
7341           tree type = TREE_VALUE (t);
7342           if (!VOID_TYPE_P (type))
7343             arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7344         }
7345
7346       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7347         return false;
7348     }
7349
7350   /* Everything else is ok.  */
7351   return true;
7352 }
7353
7354 \f
7355 /* Addressing mode support functions.  */
7356
7357 /* Return nonzero if X is a legitimate immediate operand when compiling
7358    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7359 int
7360 legitimate_pic_operand_p (rtx x)
7361 {
7362   if (GET_CODE (x) == SYMBOL_REF
7363       || (GET_CODE (x) == CONST
7364           && GET_CODE (XEXP (x, 0)) == PLUS
7365           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7366     return 0;
7367
7368   return 1;
7369 }
7370
7371 /* Record that the current function needs a PIC register.  Initialize
7372    cfun->machine->pic_reg if we have not already done so.  */
7373
7374 static void
7375 require_pic_register (void)
7376 {
7377   /* A lot of the logic here is made obscure by the fact that this
7378      routine gets called as part of the rtx cost estimation process.
7379      We don't want those calls to affect any assumptions about the real
7380      function; and further, we can't call entry_of_function() until we
7381      start the real expansion process.  */
7382   if (!crtl->uses_pic_offset_table)
7383     {
7384       gcc_assert (can_create_pseudo_p ());
7385       if (arm_pic_register != INVALID_REGNUM
7386           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7387         {
7388           if (!cfun->machine->pic_reg)
7389             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7390
7391           /* Play games to avoid marking the function as needing pic
7392              if we are being called as part of the cost-estimation
7393              process.  */
7394           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7395             crtl->uses_pic_offset_table = 1;
7396         }
7397       else
7398         {
7399           rtx_insn *seq, *insn;
7400
7401           if (!cfun->machine->pic_reg)
7402             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7403
7404           /* Play games to avoid marking the function as needing pic
7405              if we are being called as part of the cost-estimation
7406              process.  */
7407           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7408             {
7409               crtl->uses_pic_offset_table = 1;
7410               start_sequence ();
7411
7412               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7413                   && arm_pic_register > LAST_LO_REGNUM)
7414                 emit_move_insn (cfun->machine->pic_reg,
7415                                 gen_rtx_REG (Pmode, arm_pic_register));
7416               else
7417                 arm_load_pic_register (0UL);
7418
7419               seq = get_insns ();
7420               end_sequence ();
7421
7422               for (insn = seq; insn; insn = NEXT_INSN (insn))
7423                 if (INSN_P (insn))
7424                   INSN_LOCATION (insn) = prologue_location;
7425
7426               /* We can be called during expansion of PHI nodes, where
7427                  we can't yet emit instructions directly in the final
7428                  insn stream.  Queue the insns on the entry edge, they will
7429                  be committed after everything else is expanded.  */
7430               insert_insn_on_edge (seq,
7431                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7432             }
7433         }
7434     }
7435 }
7436
7437 rtx
7438 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7439 {
7440   if (GET_CODE (orig) == SYMBOL_REF
7441       || GET_CODE (orig) == LABEL_REF)
7442     {
7443       if (reg == 0)
7444         {
7445           gcc_assert (can_create_pseudo_p ());
7446           reg = gen_reg_rtx (Pmode);
7447         }
7448
7449       /* VxWorks does not impose a fixed gap between segments; the run-time
7450          gap can be different from the object-file gap.  We therefore can't
7451          use GOTOFF unless we are absolutely sure that the symbol is in the
7452          same segment as the GOT.  Unfortunately, the flexibility of linker
7453          scripts means that we can't be sure of that in general, so assume
7454          that GOTOFF is never valid on VxWorks.  */
7455       /* References to weak symbols cannot be resolved locally: they
7456          may be overridden by a non-weak definition at link time.  */
7457       rtx_insn *insn;
7458       if ((GET_CODE (orig) == LABEL_REF
7459            || (GET_CODE (orig) == SYMBOL_REF
7460                && SYMBOL_REF_LOCAL_P (orig)
7461                && (SYMBOL_REF_DECL (orig)
7462                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7463           && NEED_GOT_RELOC
7464           && arm_pic_data_is_text_relative)
7465         insn = arm_pic_static_addr (orig, reg);
7466       else
7467         {
7468           rtx pat;
7469           rtx mem;
7470
7471           /* If this function doesn't have a pic register, create one now.  */
7472           require_pic_register ();
7473
7474           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7475
7476           /* Make the MEM as close to a constant as possible.  */
7477           mem = SET_SRC (pat);
7478           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7479           MEM_READONLY_P (mem) = 1;
7480           MEM_NOTRAP_P (mem) = 1;
7481
7482           insn = emit_insn (pat);
7483         }
7484
7485       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7486          by loop.  */
7487       set_unique_reg_note (insn, REG_EQUAL, orig);
7488
7489       return reg;
7490     }
7491   else if (GET_CODE (orig) == CONST)
7492     {
7493       rtx base, offset;
7494
7495       if (GET_CODE (XEXP (orig, 0)) == PLUS
7496           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7497         return orig;
7498
7499       /* Handle the case where we have: const (UNSPEC_TLS).  */
7500       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7501           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7502         return orig;
7503
7504       /* Handle the case where we have:
7505          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7506          CONST_INT.  */
7507       if (GET_CODE (XEXP (orig, 0)) == PLUS
7508           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7509           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7510         {
7511           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7512           return orig;
7513         }
7514
7515       if (reg == 0)
7516         {
7517           gcc_assert (can_create_pseudo_p ());
7518           reg = gen_reg_rtx (Pmode);
7519         }
7520
7521       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7522
7523       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7524       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7525                                        base == reg ? 0 : reg);
7526
7527       if (CONST_INT_P (offset))
7528         {
7529           /* The base register doesn't really matter, we only want to
7530              test the index for the appropriate mode.  */
7531           if (!arm_legitimate_index_p (mode, offset, SET, 0))
7532             {
7533               gcc_assert (can_create_pseudo_p ());
7534               offset = force_reg (Pmode, offset);
7535             }
7536
7537           if (CONST_INT_P (offset))
7538             return plus_constant (Pmode, base, INTVAL (offset));
7539         }
7540
7541       if (GET_MODE_SIZE (mode) > 4
7542           && (GET_MODE_CLASS (mode) == MODE_INT
7543               || TARGET_SOFT_FLOAT))
7544         {
7545           emit_insn (gen_addsi3 (reg, base, offset));
7546           return reg;
7547         }
7548
7549       return gen_rtx_PLUS (Pmode, base, offset);
7550     }
7551
7552   return orig;
7553 }
7554
7555
7556 /* Find a spare register to use during the prolog of a function.  */
7557
7558 static int
7559 thumb_find_work_register (unsigned long pushed_regs_mask)
7560 {
7561   int reg;
7562
7563   /* Check the argument registers first as these are call-used.  The
7564      register allocation order means that sometimes r3 might be used
7565      but earlier argument registers might not, so check them all.  */
7566   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7567     if (!df_regs_ever_live_p (reg))
7568       return reg;
7569
7570   /* Before going on to check the call-saved registers we can try a couple
7571      more ways of deducing that r3 is available.  The first is when we are
7572      pushing anonymous arguments onto the stack and we have less than 4
7573      registers worth of fixed arguments(*).  In this case r3 will be part of
7574      the variable argument list and so we can be sure that it will be
7575      pushed right at the start of the function.  Hence it will be available
7576      for the rest of the prologue.
7577      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7578   if (cfun->machine->uses_anonymous_args
7579       && crtl->args.pretend_args_size > 0)
7580     return LAST_ARG_REGNUM;
7581
7582   /* The other case is when we have fixed arguments but less than 4 registers
7583      worth.  In this case r3 might be used in the body of the function, but
7584      it is not being used to convey an argument into the function.  In theory
7585      we could just check crtl->args.size to see how many bytes are
7586      being passed in argument registers, but it seems that it is unreliable.
7587      Sometimes it will have the value 0 when in fact arguments are being
7588      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7589      check the args_info.nregs field as well.  The problem with this field is
7590      that it makes no allowances for arguments that are passed to the
7591      function but which are not used.  Hence we could miss an opportunity
7592      when a function has an unused argument in r3.  But it is better to be
7593      safe than to be sorry.  */
7594   if (! cfun->machine->uses_anonymous_args
7595       && crtl->args.size >= 0
7596       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7597       && (TARGET_AAPCS_BASED
7598           ? crtl->args.info.aapcs_ncrn < 4
7599           : crtl->args.info.nregs < 4))
7600     return LAST_ARG_REGNUM;
7601
7602   /* Otherwise look for a call-saved register that is going to be pushed.  */
7603   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7604     if (pushed_regs_mask & (1 << reg))
7605       return reg;
7606
7607   if (TARGET_THUMB2)
7608     {
7609       /* Thumb-2 can use high regs.  */
7610       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7611         if (pushed_regs_mask & (1 << reg))
7612           return reg;
7613     }
7614   /* Something went wrong - thumb_compute_save_reg_mask()
7615      should have arranged for a suitable register to be pushed.  */
7616   gcc_unreachable ();
7617 }
7618
7619 static GTY(()) int pic_labelno;
7620
7621 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7622    low register.  */
7623
7624 void
7625 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7626 {
7627   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7628
7629   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7630     return;
7631
7632   gcc_assert (flag_pic);
7633
7634   pic_reg = cfun->machine->pic_reg;
7635   if (TARGET_VXWORKS_RTP)
7636     {
7637       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7638       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7639       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7640
7641       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7642
7643       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7644       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7645     }
7646   else
7647     {
7648       /* We use an UNSPEC rather than a LABEL_REF because this label
7649          never appears in the code stream.  */
7650
7651       labelno = GEN_INT (pic_labelno++);
7652       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7653       l1 = gen_rtx_CONST (VOIDmode, l1);
7654
7655       /* On the ARM the PC register contains 'dot + 8' at the time of the
7656          addition, on the Thumb it is 'dot + 4'.  */
7657       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7658       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7659                                 UNSPEC_GOTSYM_OFF);
7660       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7661
7662       if (TARGET_32BIT)
7663         {
7664           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7665         }
7666       else /* TARGET_THUMB1 */
7667         {
7668           if (arm_pic_register != INVALID_REGNUM
7669               && REGNO (pic_reg) > LAST_LO_REGNUM)
7670             {
7671               /* We will have pushed the pic register, so we should always be
7672                  able to find a work register.  */
7673               pic_tmp = gen_rtx_REG (SImode,
7674                                      thumb_find_work_register (saved_regs));
7675               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7676               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7677               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7678             }
7679           else if (arm_pic_register != INVALID_REGNUM
7680                    && arm_pic_register > LAST_LO_REGNUM
7681                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
7682             {
7683               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7684               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7685               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7686             }
7687           else
7688             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7689         }
7690     }
7691
7692   /* Need to emit this whether or not we obey regdecls,
7693      since setjmp/longjmp can cause life info to screw up.  */
7694   emit_use (pic_reg);
7695 }
7696
7697 /* Generate code to load the address of a static var when flag_pic is set.  */
7698 static rtx_insn *
7699 arm_pic_static_addr (rtx orig, rtx reg)
7700 {
7701   rtx l1, labelno, offset_rtx;
7702
7703   gcc_assert (flag_pic);
7704
7705   /* We use an UNSPEC rather than a LABEL_REF because this label
7706      never appears in the code stream.  */
7707   labelno = GEN_INT (pic_labelno++);
7708   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7709   l1 = gen_rtx_CONST (VOIDmode, l1);
7710
7711   /* On the ARM the PC register contains 'dot + 8' at the time of the
7712      addition, on the Thumb it is 'dot + 4'.  */
7713   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7714   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7715                                UNSPEC_SYMBOL_OFFSET);
7716   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7717
7718   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7719 }
7720
7721 /* Return nonzero if X is valid as an ARM state addressing register.  */
7722 static int
7723 arm_address_register_rtx_p (rtx x, int strict_p)
7724 {
7725   int regno;
7726
7727   if (!REG_P (x))
7728     return 0;
7729
7730   regno = REGNO (x);
7731
7732   if (strict_p)
7733     return ARM_REGNO_OK_FOR_BASE_P (regno);
7734
7735   return (regno <= LAST_ARM_REGNUM
7736           || regno >= FIRST_PSEUDO_REGISTER
7737           || regno == FRAME_POINTER_REGNUM
7738           || regno == ARG_POINTER_REGNUM);
7739 }
7740
7741 /* Return TRUE if this rtx is the difference of a symbol and a label,
7742    and will reduce to a PC-relative relocation in the object file.
7743    Expressions like this can be left alone when generating PIC, rather
7744    than forced through the GOT.  */
7745 static int
7746 pcrel_constant_p (rtx x)
7747 {
7748   if (GET_CODE (x) == MINUS)
7749     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7750
7751   return FALSE;
7752 }
7753
7754 /* Return true if X will surely end up in an index register after next
7755    splitting pass.  */
7756 static bool
7757 will_be_in_index_register (const_rtx x)
7758 {
7759   /* arm.md: calculate_pic_address will split this into a register.  */
7760   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7761 }
7762
7763 /* Return nonzero if X is a valid ARM state address operand.  */
7764 int
7765 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7766                                 int strict_p)
7767 {
7768   bool use_ldrd;
7769   enum rtx_code code = GET_CODE (x);
7770
7771   if (arm_address_register_rtx_p (x, strict_p))
7772     return 1;
7773
7774   use_ldrd = (TARGET_LDRD
7775               && (mode == DImode || mode == DFmode));
7776
7777   if (code == POST_INC || code == PRE_DEC
7778       || ((code == PRE_INC || code == POST_DEC)
7779           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7780     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7781
7782   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7783            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7784            && GET_CODE (XEXP (x, 1)) == PLUS
7785            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7786     {
7787       rtx addend = XEXP (XEXP (x, 1), 1);
7788
7789       /* Don't allow ldrd post increment by register because it's hard
7790          to fixup invalid register choices.  */
7791       if (use_ldrd
7792           && GET_CODE (x) == POST_MODIFY
7793           && REG_P (addend))
7794         return 0;
7795
7796       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7797               && arm_legitimate_index_p (mode, addend, outer, strict_p));
7798     }
7799
7800   /* After reload constants split into minipools will have addresses
7801      from a LABEL_REF.  */
7802   else if (reload_completed
7803            && (code == LABEL_REF
7804                || (code == CONST
7805                    && GET_CODE (XEXP (x, 0)) == PLUS
7806                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7807                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7808     return 1;
7809
7810   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7811     return 0;
7812
7813   else if (code == PLUS)
7814     {
7815       rtx xop0 = XEXP (x, 0);
7816       rtx xop1 = XEXP (x, 1);
7817
7818       return ((arm_address_register_rtx_p (xop0, strict_p)
7819                && ((CONST_INT_P (xop1)
7820                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7821                    || (!strict_p && will_be_in_index_register (xop1))))
7822               || (arm_address_register_rtx_p (xop1, strict_p)
7823                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7824     }
7825
7826 #if 0
7827   /* Reload currently can't handle MINUS, so disable this for now */
7828   else if (GET_CODE (x) == MINUS)
7829     {
7830       rtx xop0 = XEXP (x, 0);
7831       rtx xop1 = XEXP (x, 1);
7832
7833       return (arm_address_register_rtx_p (xop0, strict_p)
7834               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7835     }
7836 #endif
7837
7838   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7839            && code == SYMBOL_REF
7840            && CONSTANT_POOL_ADDRESS_P (x)
7841            && ! (flag_pic
7842                  && symbol_mentioned_p (get_pool_constant (x))
7843                  && ! pcrel_constant_p (get_pool_constant (x))))
7844     return 1;
7845
7846   return 0;
7847 }
7848
7849 /* Return true if we can avoid creating a constant pool entry for x.  */
7850 static bool
7851 can_avoid_literal_pool_for_label_p (rtx x)
7852 {
7853   /* Normally we can assign constant values to target registers without
7854      the help of constant pool.  But there are cases we have to use constant
7855      pool like:
7856      1) assign a label to register.
7857      2) sign-extend a 8bit value to 32bit and then assign to register.
7858
7859      Constant pool access in format:
7860      (set (reg r0) (mem (symbol_ref (".LC0"))))
7861      will cause the use of literal pool (later in function arm_reorg).
7862      So here we mark such format as an invalid format, then the compiler
7863      will adjust it into:
7864      (set (reg r0) (symbol_ref (".LC0")))
7865      (set (reg r0) (mem (reg r0))).
7866      No extra register is required, and (mem (reg r0)) won't cause the use
7867      of literal pools.  */
7868   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7869       && CONSTANT_POOL_ADDRESS_P (x))
7870     return 1;
7871   return 0;
7872 }
7873
7874
7875 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7876 static int
7877 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7878 {
7879   bool use_ldrd;
7880   enum rtx_code code = GET_CODE (x);
7881
7882   if (arm_address_register_rtx_p (x, strict_p))
7883     return 1;
7884
7885   use_ldrd = (TARGET_LDRD
7886               && (mode == DImode || mode == DFmode));
7887
7888   if (code == POST_INC || code == PRE_DEC
7889       || ((code == PRE_INC || code == POST_DEC)
7890           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7891     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7892
7893   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7894            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7895            && GET_CODE (XEXP (x, 1)) == PLUS
7896            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7897     {
7898       /* Thumb-2 only has autoincrement by constant.  */
7899       rtx addend = XEXP (XEXP (x, 1), 1);
7900       HOST_WIDE_INT offset;
7901
7902       if (!CONST_INT_P (addend))
7903         return 0;
7904
7905       offset = INTVAL(addend);
7906       if (GET_MODE_SIZE (mode) <= 4)
7907         return (offset > -256 && offset < 256);
7908
7909       return (use_ldrd && offset > -1024 && offset < 1024
7910               && (offset & 3) == 0);
7911     }
7912
7913   /* After reload constants split into minipools will have addresses
7914      from a LABEL_REF.  */
7915   else if (reload_completed
7916            && (code == LABEL_REF
7917                || (code == CONST
7918                    && GET_CODE (XEXP (x, 0)) == PLUS
7919                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7920                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7921     return 1;
7922
7923   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7924     return 0;
7925
7926   else if (code == PLUS)
7927     {
7928       rtx xop0 = XEXP (x, 0);
7929       rtx xop1 = XEXP (x, 1);
7930
7931       return ((arm_address_register_rtx_p (xop0, strict_p)
7932                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7933                    || (!strict_p && will_be_in_index_register (xop1))))
7934               || (arm_address_register_rtx_p (xop1, strict_p)
7935                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7936     }
7937
7938   else if (can_avoid_literal_pool_for_label_p (x))
7939     return 0;
7940
7941   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7942            && code == SYMBOL_REF
7943            && CONSTANT_POOL_ADDRESS_P (x)
7944            && ! (flag_pic
7945                  && symbol_mentioned_p (get_pool_constant (x))
7946                  && ! pcrel_constant_p (get_pool_constant (x))))
7947     return 1;
7948
7949   return 0;
7950 }
7951
7952 /* Return nonzero if INDEX is valid for an address index operand in
7953    ARM state.  */
7954 static int
7955 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7956                         int strict_p)
7957 {
7958   HOST_WIDE_INT range;
7959   enum rtx_code code = GET_CODE (index);
7960
7961   /* Standard coprocessor addressing modes.  */
7962   if (TARGET_HARD_FLOAT
7963       && (mode == SFmode || mode == DFmode))
7964     return (code == CONST_INT && INTVAL (index) < 1024
7965             && INTVAL (index) > -1024
7966             && (INTVAL (index) & 3) == 0);
7967
7968   /* For quad modes, we restrict the constant offset to be slightly less
7969      than what the instruction format permits.  We do this because for
7970      quad mode moves, we will actually decompose them into two separate
7971      double-mode reads or writes.  INDEX must therefore be a valid
7972      (double-mode) offset and so should INDEX+8.  */
7973   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7974     return (code == CONST_INT
7975             && INTVAL (index) < 1016
7976             && INTVAL (index) > -1024
7977             && (INTVAL (index) & 3) == 0);
7978
7979   /* We have no such constraint on double mode offsets, so we permit the
7980      full range of the instruction format.  */
7981   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7982     return (code == CONST_INT
7983             && INTVAL (index) < 1024
7984             && INTVAL (index) > -1024
7985             && (INTVAL (index) & 3) == 0);
7986
7987   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7988     return (code == CONST_INT
7989             && INTVAL (index) < 1024
7990             && INTVAL (index) > -1024
7991             && (INTVAL (index) & 3) == 0);
7992
7993   if (arm_address_register_rtx_p (index, strict_p)
7994       && (GET_MODE_SIZE (mode) <= 4))
7995     return 1;
7996
7997   if (mode == DImode || mode == DFmode)
7998     {
7999       if (code == CONST_INT)
8000         {
8001           HOST_WIDE_INT val = INTVAL (index);
8002
8003           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8004              If vldr is selected it uses arm_coproc_mem_operand.  */
8005           if (TARGET_LDRD)
8006             return val > -256 && val < 256;
8007           else
8008             return val > -4096 && val < 4092;
8009         }
8010
8011       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8012     }
8013
8014   if (GET_MODE_SIZE (mode) <= 4
8015       && ! (arm_arch4
8016             && (mode == HImode
8017                 || mode == HFmode
8018                 || (mode == QImode && outer == SIGN_EXTEND))))
8019     {
8020       if (code == MULT)
8021         {
8022           rtx xiop0 = XEXP (index, 0);
8023           rtx xiop1 = XEXP (index, 1);
8024
8025           return ((arm_address_register_rtx_p (xiop0, strict_p)
8026                    && power_of_two_operand (xiop1, SImode))
8027                   || (arm_address_register_rtx_p (xiop1, strict_p)
8028                       && power_of_two_operand (xiop0, SImode)));
8029         }
8030       else if (code == LSHIFTRT || code == ASHIFTRT
8031                || code == ASHIFT || code == ROTATERT)
8032         {
8033           rtx op = XEXP (index, 1);
8034
8035           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8036                   && CONST_INT_P (op)
8037                   && INTVAL (op) > 0
8038                   && INTVAL (op) <= 31);
8039         }
8040     }
8041
8042   /* For ARM v4 we may be doing a sign-extend operation during the
8043      load.  */
8044   if (arm_arch4)
8045     {
8046       if (mode == HImode
8047           || mode == HFmode
8048           || (outer == SIGN_EXTEND && mode == QImode))
8049         range = 256;
8050       else
8051         range = 4096;
8052     }
8053   else
8054     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8055
8056   return (code == CONST_INT
8057           && INTVAL (index) < range
8058           && INTVAL (index) > -range);
8059 }
8060
8061 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8062    index operand.  i.e. 1, 2, 4 or 8.  */
8063 static bool
8064 thumb2_index_mul_operand (rtx op)
8065 {
8066   HOST_WIDE_INT val;
8067
8068   if (!CONST_INT_P (op))
8069     return false;
8070
8071   val = INTVAL(op);
8072   return (val == 1 || val == 2 || val == 4 || val == 8);
8073 }
8074
8075 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8076 static int
8077 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8078 {
8079   enum rtx_code code = GET_CODE (index);
8080
8081   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8082   /* Standard coprocessor addressing modes.  */
8083   if (TARGET_HARD_FLOAT
8084       && (mode == SFmode || mode == DFmode))
8085     return (code == CONST_INT && INTVAL (index) < 1024
8086             /* Thumb-2 allows only > -256 index range for it's core register
8087                load/stores. Since we allow SF/DF in core registers, we have
8088                to use the intersection between -256~4096 (core) and -1024~1024
8089                (coprocessor).  */
8090             && INTVAL (index) > -256
8091             && (INTVAL (index) & 3) == 0);
8092
8093   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8094     {
8095       /* For DImode assume values will usually live in core regs
8096          and only allow LDRD addressing modes.  */
8097       if (!TARGET_LDRD || mode != DImode)
8098         return (code == CONST_INT
8099                 && INTVAL (index) < 1024
8100                 && INTVAL (index) > -1024
8101                 && (INTVAL (index) & 3) == 0);
8102     }
8103
8104   /* For quad modes, we restrict the constant offset to be slightly less
8105      than what the instruction format permits.  We do this because for
8106      quad mode moves, we will actually decompose them into two separate
8107      double-mode reads or writes.  INDEX must therefore be a valid
8108      (double-mode) offset and so should INDEX+8.  */
8109   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8110     return (code == CONST_INT
8111             && INTVAL (index) < 1016
8112             && INTVAL (index) > -1024
8113             && (INTVAL (index) & 3) == 0);
8114
8115   /* We have no such constraint on double mode offsets, so we permit the
8116      full range of the instruction format.  */
8117   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8118     return (code == CONST_INT
8119             && INTVAL (index) < 1024
8120             && INTVAL (index) > -1024
8121             && (INTVAL (index) & 3) == 0);
8122
8123   if (arm_address_register_rtx_p (index, strict_p)
8124       && (GET_MODE_SIZE (mode) <= 4))
8125     return 1;
8126
8127   if (mode == DImode || mode == DFmode)
8128     {
8129       if (code == CONST_INT)
8130         {
8131           HOST_WIDE_INT val = INTVAL (index);
8132           /* Thumb-2 ldrd only has reg+const addressing modes.
8133              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8134              If vldr is selected it uses arm_coproc_mem_operand.  */
8135           if (TARGET_LDRD)
8136             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8137           else
8138             return IN_RANGE (val, -255, 4095 - 4);
8139         }
8140       else
8141         return 0;
8142     }
8143
8144   if (code == MULT)
8145     {
8146       rtx xiop0 = XEXP (index, 0);
8147       rtx xiop1 = XEXP (index, 1);
8148
8149       return ((arm_address_register_rtx_p (xiop0, strict_p)
8150                && thumb2_index_mul_operand (xiop1))
8151               || (arm_address_register_rtx_p (xiop1, strict_p)
8152                   && thumb2_index_mul_operand (xiop0)));
8153     }
8154   else if (code == ASHIFT)
8155     {
8156       rtx op = XEXP (index, 1);
8157
8158       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8159               && CONST_INT_P (op)
8160               && INTVAL (op) > 0
8161               && INTVAL (op) <= 3);
8162     }
8163
8164   return (code == CONST_INT
8165           && INTVAL (index) < 4096
8166           && INTVAL (index) > -256);
8167 }
8168
8169 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8170 static int
8171 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8172 {
8173   int regno;
8174
8175   if (!REG_P (x))
8176     return 0;
8177
8178   regno = REGNO (x);
8179
8180   if (strict_p)
8181     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8182
8183   return (regno <= LAST_LO_REGNUM
8184           || regno > LAST_VIRTUAL_REGISTER
8185           || regno == FRAME_POINTER_REGNUM
8186           || (GET_MODE_SIZE (mode) >= 4
8187               && (regno == STACK_POINTER_REGNUM
8188                   || regno >= FIRST_PSEUDO_REGISTER
8189                   || x == hard_frame_pointer_rtx
8190                   || x == arg_pointer_rtx)));
8191 }
8192
8193 /* Return nonzero if x is a legitimate index register.  This is the case
8194    for any base register that can access a QImode object.  */
8195 inline static int
8196 thumb1_index_register_rtx_p (rtx x, int strict_p)
8197 {
8198   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8199 }
8200
8201 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8202
8203    The AP may be eliminated to either the SP or the FP, so we use the
8204    least common denominator, e.g. SImode, and offsets from 0 to 64.
8205
8206    ??? Verify whether the above is the right approach.
8207
8208    ??? Also, the FP may be eliminated to the SP, so perhaps that
8209    needs special handling also.
8210
8211    ??? Look at how the mips16 port solves this problem.  It probably uses
8212    better ways to solve some of these problems.
8213
8214    Although it is not incorrect, we don't accept QImode and HImode
8215    addresses based on the frame pointer or arg pointer until the
8216    reload pass starts.  This is so that eliminating such addresses
8217    into stack based ones won't produce impossible code.  */
8218 int
8219 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8220 {
8221   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8222     return 0;
8223
8224   /* ??? Not clear if this is right.  Experiment.  */
8225   if (GET_MODE_SIZE (mode) < 4
8226       && !(reload_in_progress || reload_completed)
8227       && (reg_mentioned_p (frame_pointer_rtx, x)
8228           || reg_mentioned_p (arg_pointer_rtx, x)
8229           || reg_mentioned_p (virtual_incoming_args_rtx, x)
8230           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8231           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8232           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8233     return 0;
8234
8235   /* Accept any base register.  SP only in SImode or larger.  */
8236   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8237     return 1;
8238
8239   /* This is PC relative data before arm_reorg runs.  */
8240   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8241            && GET_CODE (x) == SYMBOL_REF
8242            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8243     return 1;
8244
8245   /* This is PC relative data after arm_reorg runs.  */
8246   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8247            && reload_completed
8248            && (GET_CODE (x) == LABEL_REF
8249                || (GET_CODE (x) == CONST
8250                    && GET_CODE (XEXP (x, 0)) == PLUS
8251                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8252                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8253     return 1;
8254
8255   /* Post-inc indexing only supported for SImode and larger.  */
8256   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8257            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8258     return 1;
8259
8260   else if (GET_CODE (x) == PLUS)
8261     {
8262       /* REG+REG address can be any two index registers.  */
8263       /* We disallow FRAME+REG addressing since we know that FRAME
8264          will be replaced with STACK, and SP relative addressing only
8265          permits SP+OFFSET.  */
8266       if (GET_MODE_SIZE (mode) <= 4
8267           && XEXP (x, 0) != frame_pointer_rtx
8268           && XEXP (x, 1) != frame_pointer_rtx
8269           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8270           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8271               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8272         return 1;
8273
8274       /* REG+const has 5-7 bit offset for non-SP registers.  */
8275       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8276                 || XEXP (x, 0) == arg_pointer_rtx)
8277                && CONST_INT_P (XEXP (x, 1))
8278                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8279         return 1;
8280
8281       /* REG+const has 10-bit offset for SP, but only SImode and
8282          larger is supported.  */
8283       /* ??? Should probably check for DI/DFmode overflow here
8284          just like GO_IF_LEGITIMATE_OFFSET does.  */
8285       else if (REG_P (XEXP (x, 0))
8286                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8287                && GET_MODE_SIZE (mode) >= 4
8288                && CONST_INT_P (XEXP (x, 1))
8289                && INTVAL (XEXP (x, 1)) >= 0
8290                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8291                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8292         return 1;
8293
8294       else if (REG_P (XEXP (x, 0))
8295                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8296                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8297                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8298                        && REGNO (XEXP (x, 0))
8299                           <= LAST_VIRTUAL_POINTER_REGISTER))
8300                && GET_MODE_SIZE (mode) >= 4
8301                && CONST_INT_P (XEXP (x, 1))
8302                && (INTVAL (XEXP (x, 1)) & 3) == 0)
8303         return 1;
8304     }
8305
8306   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8307            && GET_MODE_SIZE (mode) == 4
8308            && GET_CODE (x) == SYMBOL_REF
8309            && CONSTANT_POOL_ADDRESS_P (x)
8310            && ! (flag_pic
8311                  && symbol_mentioned_p (get_pool_constant (x))
8312                  && ! pcrel_constant_p (get_pool_constant (x))))
8313     return 1;
8314
8315   return 0;
8316 }
8317
8318 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8319    instruction of mode MODE.  */
8320 int
8321 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8322 {
8323   switch (GET_MODE_SIZE (mode))
8324     {
8325     case 1:
8326       return val >= 0 && val < 32;
8327
8328     case 2:
8329       return val >= 0 && val < 64 && (val & 1) == 0;
8330
8331     default:
8332       return (val >= 0
8333               && (val + GET_MODE_SIZE (mode)) <= 128
8334               && (val & 3) == 0);
8335     }
8336 }
8337
8338 bool
8339 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8340 {
8341   if (TARGET_ARM)
8342     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8343   else if (TARGET_THUMB2)
8344     return thumb2_legitimate_address_p (mode, x, strict_p);
8345   else /* if (TARGET_THUMB1) */
8346     return thumb1_legitimate_address_p (mode, x, strict_p);
8347 }
8348
8349 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8350
8351    Given an rtx X being reloaded into a reg required to be
8352    in class CLASS, return the class of reg to actually use.
8353    In general this is just CLASS, but for the Thumb core registers and
8354    immediate constants we prefer a LO_REGS class or a subset.  */
8355
8356 static reg_class_t
8357 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8358 {
8359   if (TARGET_32BIT)
8360     return rclass;
8361   else
8362     {
8363       if (rclass == GENERAL_REGS)
8364         return LO_REGS;
8365       else
8366         return rclass;
8367     }
8368 }
8369
8370 /* Build the SYMBOL_REF for __tls_get_addr.  */
8371
8372 static GTY(()) rtx tls_get_addr_libfunc;
8373
8374 static rtx
8375 get_tls_get_addr (void)
8376 {
8377   if (!tls_get_addr_libfunc)
8378     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8379   return tls_get_addr_libfunc;
8380 }
8381
8382 rtx
8383 arm_load_tp (rtx target)
8384 {
8385   if (!target)
8386     target = gen_reg_rtx (SImode);
8387
8388   if (TARGET_HARD_TP)
8389     {
8390       /* Can return in any reg.  */
8391       emit_insn (gen_load_tp_hard (target));
8392     }
8393   else
8394     {
8395       /* Always returned in r0.  Immediately copy the result into a pseudo,
8396          otherwise other uses of r0 (e.g. setting up function arguments) may
8397          clobber the value.  */
8398
8399       rtx tmp;
8400
8401       emit_insn (gen_load_tp_soft ());
8402
8403       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8404       emit_move_insn (target, tmp);
8405     }
8406   return target;
8407 }
8408
8409 static rtx
8410 load_tls_operand (rtx x, rtx reg)
8411 {
8412   rtx tmp;
8413
8414   if (reg == NULL_RTX)
8415     reg = gen_reg_rtx (SImode);
8416
8417   tmp = gen_rtx_CONST (SImode, x);
8418
8419   emit_move_insn (reg, tmp);
8420
8421   return reg;
8422 }
8423
8424 static rtx_insn *
8425 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8426 {
8427   rtx label, labelno, sum;
8428
8429   gcc_assert (reloc != TLS_DESCSEQ);
8430   start_sequence ();
8431
8432   labelno = GEN_INT (pic_labelno++);
8433   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8434   label = gen_rtx_CONST (VOIDmode, label);
8435
8436   sum = gen_rtx_UNSPEC (Pmode,
8437                         gen_rtvec (4, x, GEN_INT (reloc), label,
8438                                    GEN_INT (TARGET_ARM ? 8 : 4)),
8439                         UNSPEC_TLS);
8440   reg = load_tls_operand (sum, reg);
8441
8442   if (TARGET_ARM)
8443     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8444   else
8445     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8446
8447   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8448                                      LCT_PURE, /* LCT_CONST?  */
8449                                      Pmode, reg, Pmode);
8450
8451   rtx_insn *insns = get_insns ();
8452   end_sequence ();
8453
8454   return insns;
8455 }
8456
8457 static rtx
8458 arm_tls_descseq_addr (rtx x, rtx reg)
8459 {
8460   rtx labelno = GEN_INT (pic_labelno++);
8461   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8462   rtx sum = gen_rtx_UNSPEC (Pmode,
8463                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8464                                        gen_rtx_CONST (VOIDmode, label),
8465                                        GEN_INT (!TARGET_ARM)),
8466                             UNSPEC_TLS);
8467   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8468
8469   emit_insn (gen_tlscall (x, labelno));
8470   if (!reg)
8471     reg = gen_reg_rtx (SImode);
8472   else
8473     gcc_assert (REGNO (reg) != R0_REGNUM);
8474
8475   emit_move_insn (reg, reg0);
8476
8477   return reg;
8478 }
8479
8480 rtx
8481 legitimize_tls_address (rtx x, rtx reg)
8482 {
8483   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8484   rtx_insn *insns;
8485   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8486
8487   switch (model)
8488     {
8489     case TLS_MODEL_GLOBAL_DYNAMIC:
8490       if (TARGET_GNU2_TLS)
8491         {
8492           reg = arm_tls_descseq_addr (x, reg);
8493
8494           tp = arm_load_tp (NULL_RTX);
8495
8496           dest = gen_rtx_PLUS (Pmode, tp, reg);
8497         }
8498       else
8499         {
8500           /* Original scheme */
8501           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8502           dest = gen_reg_rtx (Pmode);
8503           emit_libcall_block (insns, dest, ret, x);
8504         }
8505       return dest;
8506
8507     case TLS_MODEL_LOCAL_DYNAMIC:
8508       if (TARGET_GNU2_TLS)
8509         {
8510           reg = arm_tls_descseq_addr (x, reg);
8511
8512           tp = arm_load_tp (NULL_RTX);
8513
8514           dest = gen_rtx_PLUS (Pmode, tp, reg);
8515         }
8516       else
8517         {
8518           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8519
8520           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8521              share the LDM result with other LD model accesses.  */
8522           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8523                                 UNSPEC_TLS);
8524           dest = gen_reg_rtx (Pmode);
8525           emit_libcall_block (insns, dest, ret, eqv);
8526
8527           /* Load the addend.  */
8528           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8529                                                      GEN_INT (TLS_LDO32)),
8530                                    UNSPEC_TLS);
8531           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8532           dest = gen_rtx_PLUS (Pmode, dest, addend);
8533         }
8534       return dest;
8535
8536     case TLS_MODEL_INITIAL_EXEC:
8537       labelno = GEN_INT (pic_labelno++);
8538       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8539       label = gen_rtx_CONST (VOIDmode, label);
8540       sum = gen_rtx_UNSPEC (Pmode,
8541                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8542                                        GEN_INT (TARGET_ARM ? 8 : 4)),
8543                             UNSPEC_TLS);
8544       reg = load_tls_operand (sum, reg);
8545
8546       if (TARGET_ARM)
8547         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8548       else if (TARGET_THUMB2)
8549         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8550       else
8551         {
8552           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8553           emit_move_insn (reg, gen_const_mem (SImode, reg));
8554         }
8555
8556       tp = arm_load_tp (NULL_RTX);
8557
8558       return gen_rtx_PLUS (Pmode, tp, reg);
8559
8560     case TLS_MODEL_LOCAL_EXEC:
8561       tp = arm_load_tp (NULL_RTX);
8562
8563       reg = gen_rtx_UNSPEC (Pmode,
8564                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8565                             UNSPEC_TLS);
8566       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8567
8568       return gen_rtx_PLUS (Pmode, tp, reg);
8569
8570     default:
8571       abort ();
8572     }
8573 }
8574
8575 /* Try machine-dependent ways of modifying an illegitimate address
8576    to be legitimate.  If we find one, return the new, valid address.  */
8577 rtx
8578 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8579 {
8580   if (arm_tls_referenced_p (x))
8581     {
8582       rtx addend = NULL;
8583
8584       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8585         {
8586           addend = XEXP (XEXP (x, 0), 1);
8587           x = XEXP (XEXP (x, 0), 0);
8588         }
8589
8590       if (GET_CODE (x) != SYMBOL_REF)
8591         return x;
8592
8593       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8594
8595       x = legitimize_tls_address (x, NULL_RTX);
8596
8597       if (addend)
8598         {
8599           x = gen_rtx_PLUS (SImode, x, addend);
8600           orig_x = x;
8601         }
8602       else
8603         return x;
8604     }
8605
8606   if (!TARGET_ARM)
8607     {
8608       /* TODO: legitimize_address for Thumb2.  */
8609       if (TARGET_THUMB2)
8610         return x;
8611       return thumb_legitimize_address (x, orig_x, mode);
8612     }
8613
8614   if (GET_CODE (x) == PLUS)
8615     {
8616       rtx xop0 = XEXP (x, 0);
8617       rtx xop1 = XEXP (x, 1);
8618
8619       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8620         xop0 = force_reg (SImode, xop0);
8621
8622       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8623           && !symbol_mentioned_p (xop1))
8624         xop1 = force_reg (SImode, xop1);
8625
8626       if (ARM_BASE_REGISTER_RTX_P (xop0)
8627           && CONST_INT_P (xop1))
8628         {
8629           HOST_WIDE_INT n, low_n;
8630           rtx base_reg, val;
8631           n = INTVAL (xop1);
8632
8633           /* VFP addressing modes actually allow greater offsets, but for
8634              now we just stick with the lowest common denominator.  */
8635           if (mode == DImode || mode == DFmode)
8636             {
8637               low_n = n & 0x0f;
8638               n &= ~0x0f;
8639               if (low_n > 4)
8640                 {
8641                   n += 16;
8642                   low_n -= 16;
8643                 }
8644             }
8645           else
8646             {
8647               low_n = ((mode) == TImode ? 0
8648                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8649               n -= low_n;
8650             }
8651
8652           base_reg = gen_reg_rtx (SImode);
8653           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8654           emit_move_insn (base_reg, val);
8655           x = plus_constant (Pmode, base_reg, low_n);
8656         }
8657       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8658         x = gen_rtx_PLUS (SImode, xop0, xop1);
8659     }
8660
8661   /* XXX We don't allow MINUS any more -- see comment in
8662      arm_legitimate_address_outer_p ().  */
8663   else if (GET_CODE (x) == MINUS)
8664     {
8665       rtx xop0 = XEXP (x, 0);
8666       rtx xop1 = XEXP (x, 1);
8667
8668       if (CONSTANT_P (xop0))
8669         xop0 = force_reg (SImode, xop0);
8670
8671       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8672         xop1 = force_reg (SImode, xop1);
8673
8674       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8675         x = gen_rtx_MINUS (SImode, xop0, xop1);
8676     }
8677
8678   /* Make sure to take full advantage of the pre-indexed addressing mode
8679      with absolute addresses which often allows for the base register to
8680      be factorized for multiple adjacent memory references, and it might
8681      even allows for the mini pool to be avoided entirely. */
8682   else if (CONST_INT_P (x) && optimize > 0)
8683     {
8684       unsigned int bits;
8685       HOST_WIDE_INT mask, base, index;
8686       rtx base_reg;
8687
8688       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8689          use a 8-bit index. So let's use a 12-bit index for SImode only and
8690          hope that arm_gen_constant will enable ldrb to use more bits. */
8691       bits = (mode == SImode) ? 12 : 8;
8692       mask = (1 << bits) - 1;
8693       base = INTVAL (x) & ~mask;
8694       index = INTVAL (x) & mask;
8695       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8696         {
8697           /* It'll most probably be more efficient to generate the base
8698              with more bits set and use a negative index instead. */
8699           base |= mask;
8700           index -= mask;
8701         }
8702       base_reg = force_reg (SImode, GEN_INT (base));
8703       x = plus_constant (Pmode, base_reg, index);
8704     }
8705
8706   if (flag_pic)
8707     {
8708       /* We need to find and carefully transform any SYMBOL and LABEL
8709          references; so go back to the original address expression.  */
8710       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8711
8712       if (new_x != orig_x)
8713         x = new_x;
8714     }
8715
8716   return x;
8717 }
8718
8719
8720 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8721    to be legitimate.  If we find one, return the new, valid address.  */
8722 rtx
8723 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8724 {
8725   if (GET_CODE (x) == PLUS
8726       && CONST_INT_P (XEXP (x, 1))
8727       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8728           || INTVAL (XEXP (x, 1)) < 0))
8729     {
8730       rtx xop0 = XEXP (x, 0);
8731       rtx xop1 = XEXP (x, 1);
8732       HOST_WIDE_INT offset = INTVAL (xop1);
8733
8734       /* Try and fold the offset into a biasing of the base register and
8735          then offsetting that.  Don't do this when optimizing for space
8736          since it can cause too many CSEs.  */
8737       if (optimize_size && offset >= 0
8738           && offset < 256 + 31 * GET_MODE_SIZE (mode))
8739         {
8740           HOST_WIDE_INT delta;
8741
8742           if (offset >= 256)
8743             delta = offset - (256 - GET_MODE_SIZE (mode));
8744           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8745             delta = 31 * GET_MODE_SIZE (mode);
8746           else
8747             delta = offset & (~31 * GET_MODE_SIZE (mode));
8748
8749           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8750                                 NULL_RTX);
8751           x = plus_constant (Pmode, xop0, delta);
8752         }
8753       else if (offset < 0 && offset > -256)
8754         /* Small negative offsets are best done with a subtract before the
8755            dereference, forcing these into a register normally takes two
8756            instructions.  */
8757         x = force_operand (x, NULL_RTX);
8758       else
8759         {
8760           /* For the remaining cases, force the constant into a register.  */
8761           xop1 = force_reg (SImode, xop1);
8762           x = gen_rtx_PLUS (SImode, xop0, xop1);
8763         }
8764     }
8765   else if (GET_CODE (x) == PLUS
8766            && s_register_operand (XEXP (x, 1), SImode)
8767            && !s_register_operand (XEXP (x, 0), SImode))
8768     {
8769       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8770
8771       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8772     }
8773
8774   if (flag_pic)
8775     {
8776       /* We need to find and carefully transform any SYMBOL and LABEL
8777          references; so go back to the original address expression.  */
8778       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8779
8780       if (new_x != orig_x)
8781         x = new_x;
8782     }
8783
8784   return x;
8785 }
8786
8787 /* Return TRUE if X contains any TLS symbol references.  */
8788
8789 bool
8790 arm_tls_referenced_p (rtx x)
8791 {
8792   if (! TARGET_HAVE_TLS)
8793     return false;
8794
8795   subrtx_iterator::array_type array;
8796   FOR_EACH_SUBRTX (iter, array, x, ALL)
8797     {
8798       const_rtx x = *iter;
8799       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8800         {
8801           /* ARM currently does not provide relocations to encode TLS variables
8802              into AArch32 instructions, only data, so there is no way to
8803              currently implement these if a literal pool is disabled.  */
8804           if (arm_disable_literal_pool)
8805             sorry ("accessing thread-local storage is not currently supported "
8806                    "with -mpure-code or -mslow-flash-data");
8807
8808           return true;
8809         }
8810
8811       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8812          TLS offsets, not real symbol references.  */
8813       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8814         iter.skip_subrtxes ();
8815     }
8816   return false;
8817 }
8818
8819 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8820
8821    On the ARM, allow any integer (invalid ones are removed later by insn
8822    patterns), nice doubles and symbol_refs which refer to the function's
8823    constant pool XXX.
8824
8825    When generating pic allow anything.  */
8826
8827 static bool
8828 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8829 {
8830   return flag_pic || !label_mentioned_p (x);
8831 }
8832
8833 static bool
8834 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8835 {
8836   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8837      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8838      for ARMv8-M Baseline or later the result is valid.  */
8839   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8840     x = XEXP (x, 0);
8841
8842   return (CONST_INT_P (x)
8843           || CONST_DOUBLE_P (x)
8844           || CONSTANT_ADDRESS_P (x)
8845           || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8846           || flag_pic);
8847 }
8848
8849 static bool
8850 arm_legitimate_constant_p (machine_mode mode, rtx x)
8851 {
8852   return (!arm_cannot_force_const_mem (mode, x)
8853           && (TARGET_32BIT
8854               ? arm_legitimate_constant_p_1 (mode, x)
8855               : thumb_legitimate_constant_p (mode, x)));
8856 }
8857
8858 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8859
8860 static bool
8861 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8862 {
8863   rtx base, offset;
8864
8865   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8866     {
8867       split_const (x, &base, &offset);
8868       if (GET_CODE (base) == SYMBOL_REF
8869           && !offset_within_block_p (base, INTVAL (offset)))
8870         return true;
8871     }
8872   return arm_tls_referenced_p (x);
8873 }
8874 \f
8875 #define REG_OR_SUBREG_REG(X)                                            \
8876   (REG_P (X)                                                    \
8877    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8878
8879 #define REG_OR_SUBREG_RTX(X)                    \
8880    (REG_P (X) ? (X) : SUBREG_REG (X))
8881
8882 static inline int
8883 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8884 {
8885   machine_mode mode = GET_MODE (x);
8886   int total, words;
8887
8888   switch (code)
8889     {
8890     case ASHIFT:
8891     case ASHIFTRT:
8892     case LSHIFTRT:
8893     case ROTATERT:
8894       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8895
8896     case PLUS:
8897     case MINUS:
8898     case COMPARE:
8899     case NEG:
8900     case NOT:
8901       return COSTS_N_INSNS (1);
8902
8903     case MULT:
8904       if (arm_arch6m && arm_m_profile_small_mul)
8905         return COSTS_N_INSNS (32);
8906
8907       if (CONST_INT_P (XEXP (x, 1)))
8908         {
8909           int cycles = 0;
8910           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8911
8912           while (i)
8913             {
8914               i >>= 2;
8915               cycles++;
8916             }
8917           return COSTS_N_INSNS (2) + cycles;
8918         }
8919       return COSTS_N_INSNS (1) + 16;
8920
8921     case SET:
8922       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8923          the mode.  */
8924       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8925       return (COSTS_N_INSNS (words)
8926               + 4 * ((MEM_P (SET_SRC (x)))
8927                      + MEM_P (SET_DEST (x))));
8928
8929     case CONST_INT:
8930       if (outer == SET)
8931         {
8932           if (UINTVAL (x) < 256
8933               /* 16-bit constant.  */
8934               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8935             return 0;
8936           if (thumb_shiftable_const (INTVAL (x)))
8937             return COSTS_N_INSNS (2);
8938           return COSTS_N_INSNS (3);
8939         }
8940       else if ((outer == PLUS || outer == COMPARE)
8941                && INTVAL (x) < 256 && INTVAL (x) > -256)
8942         return 0;
8943       else if ((outer == IOR || outer == XOR || outer == AND)
8944                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8945         return COSTS_N_INSNS (1);
8946       else if (outer == AND)
8947         {
8948           int i;
8949           /* This duplicates the tests in the andsi3 expander.  */
8950           for (i = 9; i <= 31; i++)
8951             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8952                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8953               return COSTS_N_INSNS (2);
8954         }
8955       else if (outer == ASHIFT || outer == ASHIFTRT
8956                || outer == LSHIFTRT)
8957         return 0;
8958       return COSTS_N_INSNS (2);
8959
8960     case CONST:
8961     case CONST_DOUBLE:
8962     case LABEL_REF:
8963     case SYMBOL_REF:
8964       return COSTS_N_INSNS (3);
8965
8966     case UDIV:
8967     case UMOD:
8968     case DIV:
8969     case MOD:
8970       return 100;
8971
8972     case TRUNCATE:
8973       return 99;
8974
8975     case AND:
8976     case XOR:
8977     case IOR:
8978       /* XXX guess.  */
8979       return 8;
8980
8981     case MEM:
8982       /* XXX another guess.  */
8983       /* Memory costs quite a lot for the first word, but subsequent words
8984          load at the equivalent of a single insn each.  */
8985       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8986               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8987                  ? 4 : 0));
8988
8989     case IF_THEN_ELSE:
8990       /* XXX a guess.  */
8991       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8992         return 14;
8993       return 2;
8994
8995     case SIGN_EXTEND:
8996     case ZERO_EXTEND:
8997       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8998       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8999
9000       if (mode == SImode)
9001         return total;
9002
9003       if (arm_arch6)
9004         return total + COSTS_N_INSNS (1);
9005
9006       /* Assume a two-shift sequence.  Increase the cost slightly so
9007          we prefer actual shifts over an extend operation.  */
9008       return total + 1 + COSTS_N_INSNS (2);
9009
9010     default:
9011       return 99;
9012     }
9013 }
9014
9015 /* Estimates the size cost of thumb1 instructions.
9016    For now most of the code is copied from thumb1_rtx_costs. We need more
9017    fine grain tuning when we have more related test cases.  */
9018 static inline int
9019 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9020 {
9021   machine_mode mode = GET_MODE (x);
9022   int words, cost;
9023
9024   switch (code)
9025     {
9026     case ASHIFT:
9027     case ASHIFTRT:
9028     case LSHIFTRT:
9029     case ROTATERT:
9030       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9031
9032     case PLUS:
9033     case MINUS:
9034       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9035          defined by RTL expansion, especially for the expansion of
9036          multiplication.  */
9037       if ((GET_CODE (XEXP (x, 0)) == MULT
9038            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9039           || (GET_CODE (XEXP (x, 1)) == MULT
9040               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9041         return COSTS_N_INSNS (2);
9042       /* Fall through.  */
9043     case COMPARE:
9044     case NEG:
9045     case NOT:
9046       return COSTS_N_INSNS (1);
9047
9048     case MULT:
9049       if (CONST_INT_P (XEXP (x, 1)))
9050         {
9051           /* Thumb1 mul instruction can't operate on const. We must Load it
9052              into a register first.  */
9053           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9054           /* For the targets which have a very small and high-latency multiply
9055              unit, we prefer to synthesize the mult with up to 5 instructions,
9056              giving a good balance between size and performance.  */
9057           if (arm_arch6m && arm_m_profile_small_mul)
9058             return COSTS_N_INSNS (5);
9059           else
9060             return COSTS_N_INSNS (1) + const_size;
9061         }
9062       return COSTS_N_INSNS (1);
9063
9064     case SET:
9065       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9066          the mode.  */
9067       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9068       cost = COSTS_N_INSNS (words);
9069       if (satisfies_constraint_J (SET_SRC (x))
9070           || satisfies_constraint_K (SET_SRC (x))
9071              /* Too big an immediate for a 2-byte mov, using MOVT.  */
9072           || (CONST_INT_P (SET_SRC (x))
9073               && UINTVAL (SET_SRC (x)) >= 256
9074               && TARGET_HAVE_MOVT
9075               && satisfies_constraint_j (SET_SRC (x)))
9076              /* thumb1_movdi_insn.  */
9077           || ((words > 1) && MEM_P (SET_SRC (x))))
9078         cost += COSTS_N_INSNS (1);
9079       return cost;
9080
9081     case CONST_INT:
9082       if (outer == SET)
9083         {
9084           if (UINTVAL (x) < 256)
9085             return COSTS_N_INSNS (1);
9086           /* movw is 4byte long.  */
9087           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9088             return COSTS_N_INSNS (2);
9089           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9090           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9091             return COSTS_N_INSNS (2);
9092           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9093           if (thumb_shiftable_const (INTVAL (x)))
9094             return COSTS_N_INSNS (2);
9095           return COSTS_N_INSNS (3);
9096         }
9097       else if ((outer == PLUS || outer == COMPARE)
9098                && INTVAL (x) < 256 && INTVAL (x) > -256)
9099         return 0;
9100       else if ((outer == IOR || outer == XOR || outer == AND)
9101                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9102         return COSTS_N_INSNS (1);
9103       else if (outer == AND)
9104         {
9105           int i;
9106           /* This duplicates the tests in the andsi3 expander.  */
9107           for (i = 9; i <= 31; i++)
9108             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9109                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9110               return COSTS_N_INSNS (2);
9111         }
9112       else if (outer == ASHIFT || outer == ASHIFTRT
9113                || outer == LSHIFTRT)
9114         return 0;
9115       return COSTS_N_INSNS (2);
9116
9117     case CONST:
9118     case CONST_DOUBLE:
9119     case LABEL_REF:
9120     case SYMBOL_REF:
9121       return COSTS_N_INSNS (3);
9122
9123     case UDIV:
9124     case UMOD:
9125     case DIV:
9126     case MOD:
9127       return 100;
9128
9129     case TRUNCATE:
9130       return 99;
9131
9132     case AND:
9133     case XOR:
9134     case IOR:
9135       return COSTS_N_INSNS (1);
9136
9137     case MEM:
9138       return (COSTS_N_INSNS (1)
9139               + COSTS_N_INSNS (1)
9140                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9141               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9142                  ? COSTS_N_INSNS (1) : 0));
9143
9144     case IF_THEN_ELSE:
9145       /* XXX a guess.  */
9146       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9147         return 14;
9148       return 2;
9149
9150     case ZERO_EXTEND:
9151       /* XXX still guessing.  */
9152       switch (GET_MODE (XEXP (x, 0)))
9153         {
9154           case E_QImode:
9155             return (1 + (mode == DImode ? 4 : 0)
9156                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9157
9158           case E_HImode:
9159             return (4 + (mode == DImode ? 4 : 0)
9160                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9161
9162           case E_SImode:
9163             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9164
9165           default:
9166             return 99;
9167         }
9168
9169     default:
9170       return 99;
9171     }
9172 }
9173
9174 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9175    operand, then return the operand that is being shifted.  If the shift
9176    is not by a constant, then set SHIFT_REG to point to the operand.
9177    Return NULL if OP is not a shifter operand.  */
9178 static rtx
9179 shifter_op_p (rtx op, rtx *shift_reg)
9180 {
9181   enum rtx_code code = GET_CODE (op);
9182
9183   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9184       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9185     return XEXP (op, 0);
9186   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9187     return XEXP (op, 0);
9188   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9189            || code == ASHIFTRT)
9190     {
9191       if (!CONST_INT_P (XEXP (op, 1)))
9192         *shift_reg = XEXP (op, 1);
9193       return XEXP (op, 0);
9194     }
9195
9196   return NULL;
9197 }
9198
9199 static bool
9200 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9201 {
9202   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9203   rtx_code code = GET_CODE (x);
9204   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9205
9206   switch (XINT (x, 1))
9207     {
9208     case UNSPEC_UNALIGNED_LOAD:
9209       /* We can only do unaligned loads into the integer unit, and we can't
9210          use LDM or LDRD.  */
9211       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9212       if (speed_p)
9213         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9214                   + extra_cost->ldst.load_unaligned);
9215
9216 #ifdef NOT_YET
9217       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9218                                  ADDR_SPACE_GENERIC, speed_p);
9219 #endif
9220       return true;
9221
9222     case UNSPEC_UNALIGNED_STORE:
9223       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9224       if (speed_p)
9225         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9226                   + extra_cost->ldst.store_unaligned);
9227
9228       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9229 #ifdef NOT_YET
9230       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9231                                  ADDR_SPACE_GENERIC, speed_p);
9232 #endif
9233       return true;
9234
9235     case UNSPEC_VRINTZ:
9236     case UNSPEC_VRINTP:
9237     case UNSPEC_VRINTM:
9238     case UNSPEC_VRINTR:
9239     case UNSPEC_VRINTX:
9240     case UNSPEC_VRINTA:
9241       if (speed_p)
9242         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9243
9244       return true;
9245     default:
9246       *cost = COSTS_N_INSNS (2);
9247       break;
9248     }
9249   return true;
9250 }
9251
9252 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9253    call (one insn for -Os) and then one for processing the result.  */
9254 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9255
9256 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9257         do                                                              \
9258           {                                                             \
9259             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9260             if (shift_op != NULL                                        \
9261                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9262               {                                                         \
9263                 if (shift_reg)                                          \
9264                   {                                                     \
9265                     if (speed_p)                                        \
9266                       *cost += extra_cost->alu.arith_shift_reg;         \
9267                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9268                                        ASHIFT, 1, speed_p);             \
9269                   }                                                     \
9270                 else if (speed_p)                                       \
9271                   *cost += extra_cost->alu.arith_shift;                 \
9272                                                                         \
9273                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
9274                                     ASHIFT, 0, speed_p)                 \
9275                           + rtx_cost (XEXP (x, 1 - IDX),                \
9276                                       GET_MODE (shift_op),              \
9277                                       OP, 1, speed_p));                 \
9278                 return true;                                            \
9279               }                                                         \
9280           }                                                             \
9281         while (0)
9282
9283 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9284    considering the costs of the addressing mode and memory access
9285    separately.  */
9286 static bool
9287 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9288                int *cost, bool speed_p)
9289 {
9290   machine_mode mode = GET_MODE (x);
9291
9292   *cost = COSTS_N_INSNS (1);
9293
9294   if (flag_pic
9295       && GET_CODE (XEXP (x, 0)) == PLUS
9296       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9297     /* This will be split into two instructions.  Add the cost of the
9298        additional instruction here.  The cost of the memory access is computed
9299        below.  See arm.md:calculate_pic_address.  */
9300     *cost += COSTS_N_INSNS (1);
9301
9302   /* Calculate cost of the addressing mode.  */
9303   if (speed_p)
9304     {
9305       arm_addr_mode_op op_type;
9306       switch (GET_CODE (XEXP (x, 0)))
9307         {
9308         default:
9309         case REG:
9310           op_type = AMO_DEFAULT;
9311           break;
9312         case MINUS:
9313           /* MINUS does not appear in RTL, but the architecture supports it,
9314              so handle this case defensively.  */
9315           /* fall through */
9316         case PLUS:
9317           op_type = AMO_NO_WB;
9318           break;
9319         case PRE_INC:
9320         case PRE_DEC:
9321         case POST_INC:
9322         case POST_DEC:
9323         case PRE_MODIFY:
9324         case POST_MODIFY:
9325           op_type = AMO_WB;
9326           break;
9327         }
9328
9329       if (VECTOR_MODE_P (mode))
9330           *cost += current_tune->addr_mode_costs->vector[op_type];
9331       else if (FLOAT_MODE_P (mode))
9332           *cost += current_tune->addr_mode_costs->fp[op_type];
9333       else
9334           *cost += current_tune->addr_mode_costs->integer[op_type];
9335     }
9336
9337   /* Calculate cost of memory access.  */
9338   if (speed_p)
9339     {
9340       if (FLOAT_MODE_P (mode))
9341         {
9342           if (GET_MODE_SIZE (mode) == 8)
9343             *cost += extra_cost->ldst.loadd;
9344           else
9345             *cost += extra_cost->ldst.loadf;
9346         }
9347       else if (VECTOR_MODE_P (mode))
9348         *cost += extra_cost->ldst.loadv;
9349       else
9350         {
9351           /* Integer modes */
9352           if (GET_MODE_SIZE (mode) == 8)
9353             *cost += extra_cost->ldst.ldrd;
9354           else
9355             *cost += extra_cost->ldst.load;
9356         }
9357     }
9358
9359   return true;
9360 }
9361
9362 /* RTX costs.  Make an estimate of the cost of executing the operation
9363    X, which is contained within an operation with code OUTER_CODE.
9364    SPEED_P indicates whether the cost desired is the performance cost,
9365    or the size cost.  The estimate is stored in COST and the return
9366    value is TRUE if the cost calculation is final, or FALSE if the
9367    caller should recurse through the operands of X to add additional
9368    costs.
9369
9370    We currently make no attempt to model the size savings of Thumb-2
9371    16-bit instructions.  At the normal points in compilation where
9372    this code is called we have no measure of whether the condition
9373    flags are live or not, and thus no realistic way to determine what
9374    the size will eventually be.  */
9375 static bool
9376 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9377                    const struct cpu_cost_table *extra_cost,
9378                    int *cost, bool speed_p)
9379 {
9380   machine_mode mode = GET_MODE (x);
9381
9382   *cost = COSTS_N_INSNS (1);
9383
9384   if (TARGET_THUMB1)
9385     {
9386       if (speed_p)
9387         *cost = thumb1_rtx_costs (x, code, outer_code);
9388       else
9389         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9390       return true;
9391     }
9392
9393   switch (code)
9394     {
9395     case SET:
9396       *cost = 0;
9397       /* SET RTXs don't have a mode so we get it from the destination.  */
9398       mode = GET_MODE (SET_DEST (x));
9399
9400       if (REG_P (SET_SRC (x))
9401           && REG_P (SET_DEST (x)))
9402         {
9403           /* Assume that most copies can be done with a single insn,
9404              unless we don't have HW FP, in which case everything
9405              larger than word mode will require two insns.  */
9406           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9407                                    && GET_MODE_SIZE (mode) > 4)
9408                                   || mode == DImode)
9409                                  ? 2 : 1);
9410           /* Conditional register moves can be encoded
9411              in 16 bits in Thumb mode.  */
9412           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9413             *cost >>= 1;
9414
9415           return true;
9416         }
9417
9418       if (CONST_INT_P (SET_SRC (x)))
9419         {
9420           /* Handle CONST_INT here, since the value doesn't have a mode
9421              and we would otherwise be unable to work out the true cost.  */
9422           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9423                             0, speed_p);
9424           outer_code = SET;
9425           /* Slightly lower the cost of setting a core reg to a constant.
9426              This helps break up chains and allows for better scheduling.  */
9427           if (REG_P (SET_DEST (x))
9428               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9429             *cost -= 1;
9430           x = SET_SRC (x);
9431           /* Immediate moves with an immediate in the range [0, 255] can be
9432              encoded in 16 bits in Thumb mode.  */
9433           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9434               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9435             *cost >>= 1;
9436           goto const_int_cost;
9437         }
9438
9439       return false;
9440
9441     case MEM:
9442       return arm_mem_costs (x, extra_cost, cost, speed_p);
9443
9444     case PARALLEL:
9445     {
9446    /* Calculations of LDM costs are complex.  We assume an initial cost
9447    (ldm_1st) which will load the number of registers mentioned in
9448    ldm_regs_per_insn_1st registers; then each additional
9449    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9450    formula for N regs is thus:
9451
9452    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9453                              + ldm_regs_per_insn_subsequent - 1)
9454                             / ldm_regs_per_insn_subsequent).
9455
9456    Additional costs may also be added for addressing.  A similar
9457    formula is used for STM.  */
9458
9459       bool is_ldm = load_multiple_operation (x, SImode);
9460       bool is_stm = store_multiple_operation (x, SImode);
9461
9462       if (is_ldm || is_stm)
9463         {
9464           if (speed_p)
9465             {
9466               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9467               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9468                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9469                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9470               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9471                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9472                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9473
9474               *cost += regs_per_insn_1st
9475                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9476                                             + regs_per_insn_sub - 1)
9477                                           / regs_per_insn_sub);
9478               return true;
9479             }
9480
9481         }
9482       return false;
9483     }
9484     case DIV:
9485     case UDIV:
9486       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9487           && (mode == SFmode || !TARGET_VFP_SINGLE))
9488         *cost += COSTS_N_INSNS (speed_p
9489                                ? extra_cost->fp[mode != SFmode].div : 0);
9490       else if (mode == SImode && TARGET_IDIV)
9491         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9492       else
9493         *cost = LIBCALL_COST (2);
9494
9495       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9496          possible udiv is prefered.  */
9497       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9498       return false;     /* All arguments must be in registers.  */
9499
9500     case MOD:
9501       /* MOD by a power of 2 can be expanded as:
9502          rsbs    r1, r0, #0
9503          and     r0, r0, #(n - 1)
9504          and     r1, r1, #(n - 1)
9505          rsbpl   r0, r1, #0.  */
9506       if (CONST_INT_P (XEXP (x, 1))
9507           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9508           && mode == SImode)
9509         {
9510           *cost += COSTS_N_INSNS (3);
9511
9512           if (speed_p)
9513             *cost += 2 * extra_cost->alu.logical
9514                      + extra_cost->alu.arith;
9515           return true;
9516         }
9517
9518     /* Fall-through.  */
9519     case UMOD:
9520       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9521          possible udiv is prefered.  */
9522       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9523       return false;     /* All arguments must be in registers.  */
9524
9525     case ROTATE:
9526       if (mode == SImode && REG_P (XEXP (x, 1)))
9527         {
9528           *cost += (COSTS_N_INSNS (1)
9529                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9530           if (speed_p)
9531             *cost += extra_cost->alu.shift_reg;
9532           return true;
9533         }
9534       /* Fall through */
9535     case ROTATERT:
9536     case ASHIFT:
9537     case LSHIFTRT:
9538     case ASHIFTRT:
9539       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9540         {
9541           *cost += (COSTS_N_INSNS (2)
9542                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9543           if (speed_p)
9544             *cost += 2 * extra_cost->alu.shift;
9545           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9546           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9547             *cost += 1;
9548           return true;
9549         }
9550       else if (mode == SImode)
9551         {
9552           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9553           /* Slightly disparage register shifts at -Os, but not by much.  */
9554           if (!CONST_INT_P (XEXP (x, 1)))
9555             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9556                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9557           return true;
9558         }
9559       else if (GET_MODE_CLASS (mode) == MODE_INT
9560                && GET_MODE_SIZE (mode) < 4)
9561         {
9562           if (code == ASHIFT)
9563             {
9564               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9565               /* Slightly disparage register shifts at -Os, but not by
9566                  much.  */
9567               if (!CONST_INT_P (XEXP (x, 1)))
9568                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9569                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9570             }
9571           else if (code == LSHIFTRT || code == ASHIFTRT)
9572             {
9573               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9574                 {
9575                   /* Can use SBFX/UBFX.  */
9576                   if (speed_p)
9577                     *cost += extra_cost->alu.bfx;
9578                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9579                 }
9580               else
9581                 {
9582                   *cost += COSTS_N_INSNS (1);
9583                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9584                   if (speed_p)
9585                     {
9586                       if (CONST_INT_P (XEXP (x, 1)))
9587                         *cost += 2 * extra_cost->alu.shift;
9588                       else
9589                         *cost += (extra_cost->alu.shift
9590                                   + extra_cost->alu.shift_reg);
9591                     }
9592                   else
9593                     /* Slightly disparage register shifts.  */
9594                     *cost += !CONST_INT_P (XEXP (x, 1));
9595                 }
9596             }
9597           else /* Rotates.  */
9598             {
9599               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9600               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9601               if (speed_p)
9602                 {
9603                   if (CONST_INT_P (XEXP (x, 1)))
9604                     *cost += (2 * extra_cost->alu.shift
9605                               + extra_cost->alu.log_shift);
9606                   else
9607                     *cost += (extra_cost->alu.shift
9608                               + extra_cost->alu.shift_reg
9609                               + extra_cost->alu.log_shift_reg);
9610                 }
9611             }
9612           return true;
9613         }
9614
9615       *cost = LIBCALL_COST (2);
9616       return false;
9617
9618     case BSWAP:
9619       if (arm_arch6)
9620         {
9621           if (mode == SImode)
9622             {
9623               if (speed_p)
9624                 *cost += extra_cost->alu.rev;
9625
9626               return false;
9627             }
9628         }
9629       else
9630         {
9631         /* No rev instruction available.  Look at arm_legacy_rev
9632            and thumb_legacy_rev for the form of RTL used then.  */
9633           if (TARGET_THUMB)
9634             {
9635               *cost += COSTS_N_INSNS (9);
9636
9637               if (speed_p)
9638                 {
9639                   *cost += 6 * extra_cost->alu.shift;
9640                   *cost += 3 * extra_cost->alu.logical;
9641                 }
9642             }
9643           else
9644             {
9645               *cost += COSTS_N_INSNS (4);
9646
9647               if (speed_p)
9648                 {
9649                   *cost += 2 * extra_cost->alu.shift;
9650                   *cost += extra_cost->alu.arith_shift;
9651                   *cost += 2 * extra_cost->alu.logical;
9652                 }
9653             }
9654           return true;
9655         }
9656       return false;
9657
9658     case MINUS:
9659       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9660           && (mode == SFmode || !TARGET_VFP_SINGLE))
9661         {
9662           if (GET_CODE (XEXP (x, 0)) == MULT
9663               || GET_CODE (XEXP (x, 1)) == MULT)
9664             {
9665               rtx mul_op0, mul_op1, sub_op;
9666
9667               if (speed_p)
9668                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9669
9670               if (GET_CODE (XEXP (x, 0)) == MULT)
9671                 {
9672                   mul_op0 = XEXP (XEXP (x, 0), 0);
9673                   mul_op1 = XEXP (XEXP (x, 0), 1);
9674                   sub_op = XEXP (x, 1);
9675                 }
9676               else
9677                 {
9678                   mul_op0 = XEXP (XEXP (x, 1), 0);
9679                   mul_op1 = XEXP (XEXP (x, 1), 1);
9680                   sub_op = XEXP (x, 0);
9681                 }
9682
9683               /* The first operand of the multiply may be optionally
9684                  negated.  */
9685               if (GET_CODE (mul_op0) == NEG)
9686                 mul_op0 = XEXP (mul_op0, 0);
9687
9688               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9689                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9690                         + rtx_cost (sub_op, mode, code, 0, speed_p));
9691
9692               return true;
9693             }
9694
9695           if (speed_p)
9696             *cost += extra_cost->fp[mode != SFmode].addsub;
9697           return false;
9698         }
9699
9700       if (mode == SImode)
9701         {
9702           rtx shift_by_reg = NULL;
9703           rtx shift_op;
9704           rtx non_shift_op;
9705
9706           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9707           if (shift_op == NULL)
9708             {
9709               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9710               non_shift_op = XEXP (x, 0);
9711             }
9712           else
9713             non_shift_op = XEXP (x, 1);
9714
9715           if (shift_op != NULL)
9716             {
9717               if (shift_by_reg != NULL)
9718                 {
9719                   if (speed_p)
9720                     *cost += extra_cost->alu.arith_shift_reg;
9721                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9722                 }
9723               else if (speed_p)
9724                 *cost += extra_cost->alu.arith_shift;
9725
9726               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9727               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9728               return true;
9729             }
9730
9731           if (arm_arch_thumb2
9732               && GET_CODE (XEXP (x, 1)) == MULT)
9733             {
9734               /* MLS.  */
9735               if (speed_p)
9736                 *cost += extra_cost->mult[0].add;
9737               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9738               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9739               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9740               return true;
9741             }
9742
9743           if (CONST_INT_P (XEXP (x, 0)))
9744             {
9745               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9746                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9747                                             NULL_RTX, 1, 0);
9748               *cost = COSTS_N_INSNS (insns);
9749               if (speed_p)
9750                 *cost += insns * extra_cost->alu.arith;
9751               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9752               return true;
9753             }
9754           else if (speed_p)
9755             *cost += extra_cost->alu.arith;
9756
9757           return false;
9758         }
9759
9760       if (GET_MODE_CLASS (mode) == MODE_INT
9761           && GET_MODE_SIZE (mode) < 4)
9762         {
9763           rtx shift_op, shift_reg;
9764           shift_reg = NULL;
9765
9766           /* We check both sides of the MINUS for shifter operands since,
9767              unlike PLUS, it's not commutative.  */
9768
9769           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9770           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9771
9772           /* Slightly disparage, as we might need to widen the result.  */
9773           *cost += 1;
9774           if (speed_p)
9775             *cost += extra_cost->alu.arith;
9776
9777           if (CONST_INT_P (XEXP (x, 0)))
9778             {
9779               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9780               return true;
9781             }
9782
9783           return false;
9784         }
9785
9786       if (mode == DImode)
9787         {
9788           *cost += COSTS_N_INSNS (1);
9789
9790           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9791             {
9792               rtx op1 = XEXP (x, 1);
9793
9794               if (speed_p)
9795                 *cost += 2 * extra_cost->alu.arith;
9796
9797               if (GET_CODE (op1) == ZERO_EXTEND)
9798                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9799                                    0, speed_p);
9800               else
9801                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9802               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9803                                  0, speed_p);
9804               return true;
9805             }
9806           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9807             {
9808               if (speed_p)
9809                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9810               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9811                                   0, speed_p)
9812                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9813               return true;
9814             }
9815           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9816                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9817             {
9818               if (speed_p)
9819                 *cost += (extra_cost->alu.arith
9820                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9821                              ? extra_cost->alu.arith
9822                              : extra_cost->alu.arith_shift));
9823               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9824                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9825                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9826               return true;
9827             }
9828
9829           if (speed_p)
9830             *cost += 2 * extra_cost->alu.arith;
9831           return false;
9832         }
9833
9834       /* Vector mode?  */
9835
9836       *cost = LIBCALL_COST (2);
9837       return false;
9838
9839     case PLUS:
9840       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9841           && (mode == SFmode || !TARGET_VFP_SINGLE))
9842         {
9843           if (GET_CODE (XEXP (x, 0)) == MULT)
9844             {
9845               rtx mul_op0, mul_op1, add_op;
9846
9847               if (speed_p)
9848                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9849
9850               mul_op0 = XEXP (XEXP (x, 0), 0);
9851               mul_op1 = XEXP (XEXP (x, 0), 1);
9852               add_op = XEXP (x, 1);
9853
9854               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9855                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
9856                         + rtx_cost (add_op, mode, code, 0, speed_p));
9857
9858               return true;
9859             }
9860
9861           if (speed_p)
9862             *cost += extra_cost->fp[mode != SFmode].addsub;
9863           return false;
9864         }
9865       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9866         {
9867           *cost = LIBCALL_COST (2);
9868           return false;
9869         }
9870
9871         /* Narrow modes can be synthesized in SImode, but the range
9872            of useful sub-operations is limited.  Check for shift operations
9873            on one of the operands.  Only left shifts can be used in the
9874            narrow modes.  */
9875       if (GET_MODE_CLASS (mode) == MODE_INT
9876           && GET_MODE_SIZE (mode) < 4)
9877         {
9878           rtx shift_op, shift_reg;
9879           shift_reg = NULL;
9880
9881           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9882
9883           if (CONST_INT_P (XEXP (x, 1)))
9884             {
9885               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9886                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9887                                             NULL_RTX, 1, 0);
9888               *cost = COSTS_N_INSNS (insns);
9889               if (speed_p)
9890                 *cost += insns * extra_cost->alu.arith;
9891               /* Slightly penalize a narrow operation as the result may
9892                  need widening.  */
9893               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9894               return true;
9895             }
9896
9897           /* Slightly penalize a narrow operation as the result may
9898              need widening.  */
9899           *cost += 1;
9900           if (speed_p)
9901             *cost += extra_cost->alu.arith;
9902
9903           return false;
9904         }
9905
9906       if (mode == SImode)
9907         {
9908           rtx shift_op, shift_reg;
9909
9910           if (TARGET_INT_SIMD
9911               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9912                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9913             {
9914               /* UXTA[BH] or SXTA[BH].  */
9915               if (speed_p)
9916                 *cost += extra_cost->alu.extend_arith;
9917               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9918                                   0, speed_p)
9919                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9920               return true;
9921             }
9922
9923           shift_reg = NULL;
9924           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9925           if (shift_op != NULL)
9926             {
9927               if (shift_reg)
9928                 {
9929                   if (speed_p)
9930                     *cost += extra_cost->alu.arith_shift_reg;
9931                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9932                 }
9933               else if (speed_p)
9934                 *cost += extra_cost->alu.arith_shift;
9935
9936               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9937                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9938               return true;
9939             }
9940           if (GET_CODE (XEXP (x, 0)) == MULT)
9941             {
9942               rtx mul_op = XEXP (x, 0);
9943
9944               if (TARGET_DSP_MULTIPLY
9945                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9946                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9947                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9948                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9949                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9950                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9951                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9952                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9953                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9954                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9955                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9956                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9957                                       == 16))))))
9958                 {
9959                   /* SMLA[BT][BT].  */
9960                   if (speed_p)
9961                     *cost += extra_cost->mult[0].extend_add;
9962                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9963                                       SIGN_EXTEND, 0, speed_p)
9964                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9965                                         SIGN_EXTEND, 0, speed_p)
9966                             + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9967                   return true;
9968                 }
9969
9970               if (speed_p)
9971                 *cost += extra_cost->mult[0].add;
9972               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9973                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9974                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9975               return true;
9976             }
9977           if (CONST_INT_P (XEXP (x, 1)))
9978             {
9979               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9980                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9981                                             NULL_RTX, 1, 0);
9982               *cost = COSTS_N_INSNS (insns);
9983               if (speed_p)
9984                 *cost += insns * extra_cost->alu.arith;
9985               *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9986               return true;
9987             }
9988           else if (speed_p)
9989             *cost += extra_cost->alu.arith;
9990
9991           return false;
9992         }
9993
9994       if (mode == DImode)
9995         {
9996           if (GET_CODE (XEXP (x, 0)) == MULT
9997               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9998                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9999                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10000                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10001             {
10002               if (speed_p)
10003                 *cost += extra_cost->mult[1].extend_add;
10004               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10005                                   ZERO_EXTEND, 0, speed_p)
10006                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10007                                     ZERO_EXTEND, 0, speed_p)
10008                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10009               return true;
10010             }
10011
10012           *cost += COSTS_N_INSNS (1);
10013
10014           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10015               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10016             {
10017               if (speed_p)
10018                 *cost += (extra_cost->alu.arith
10019                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10020                              ? extra_cost->alu.arith
10021                              : extra_cost->alu.arith_shift));
10022
10023               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10024                                   0, speed_p)
10025                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10026               return true;
10027             }
10028
10029           if (speed_p)
10030             *cost += 2 * extra_cost->alu.arith;
10031           return false;
10032         }
10033
10034       /* Vector mode?  */
10035       *cost = LIBCALL_COST (2);
10036       return false;
10037     case IOR:
10038       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10039         {
10040           if (speed_p)
10041             *cost += extra_cost->alu.rev;
10042
10043           return true;
10044         }
10045     /* Fall through.  */
10046     case AND: case XOR:
10047       if (mode == SImode)
10048         {
10049           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10050           rtx op0 = XEXP (x, 0);
10051           rtx shift_op, shift_reg;
10052
10053           if (subcode == NOT
10054               && (code == AND
10055                   || (code == IOR && TARGET_THUMB2)))
10056             op0 = XEXP (op0, 0);
10057
10058           shift_reg = NULL;
10059           shift_op = shifter_op_p (op0, &shift_reg);
10060           if (shift_op != NULL)
10061             {
10062               if (shift_reg)
10063                 {
10064                   if (speed_p)
10065                     *cost += extra_cost->alu.log_shift_reg;
10066                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10067                 }
10068               else if (speed_p)
10069                 *cost += extra_cost->alu.log_shift;
10070
10071               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10072                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10073               return true;
10074             }
10075
10076           if (CONST_INT_P (XEXP (x, 1)))
10077             {
10078               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10079                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10080                                             NULL_RTX, 1, 0);
10081
10082               *cost = COSTS_N_INSNS (insns);
10083               if (speed_p)
10084                 *cost += insns * extra_cost->alu.logical;
10085               *cost += rtx_cost (op0, mode, code, 0, speed_p);
10086               return true;
10087             }
10088
10089           if (speed_p)
10090             *cost += extra_cost->alu.logical;
10091           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10092                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10093           return true;
10094         }
10095
10096       if (mode == DImode)
10097         {
10098           rtx op0 = XEXP (x, 0);
10099           enum rtx_code subcode = GET_CODE (op0);
10100
10101           *cost += COSTS_N_INSNS (1);
10102
10103           if (subcode == NOT
10104               && (code == AND
10105                   || (code == IOR && TARGET_THUMB2)))
10106             op0 = XEXP (op0, 0);
10107
10108           if (GET_CODE (op0) == ZERO_EXTEND)
10109             {
10110               if (speed_p)
10111                 *cost += 2 * extra_cost->alu.logical;
10112
10113               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10114                                   0, speed_p)
10115                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10116               return true;
10117             }
10118           else if (GET_CODE (op0) == SIGN_EXTEND)
10119             {
10120               if (speed_p)
10121                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10122
10123               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10124                                   0, speed_p)
10125                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10126               return true;
10127             }
10128
10129           if (speed_p)
10130             *cost += 2 * extra_cost->alu.logical;
10131
10132           return true;
10133         }
10134       /* Vector mode?  */
10135
10136       *cost = LIBCALL_COST (2);
10137       return false;
10138
10139     case MULT:
10140       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10141           && (mode == SFmode || !TARGET_VFP_SINGLE))
10142         {
10143           rtx op0 = XEXP (x, 0);
10144
10145           if (GET_CODE (op0) == NEG && !flag_rounding_math)
10146             op0 = XEXP (op0, 0);
10147
10148           if (speed_p)
10149             *cost += extra_cost->fp[mode != SFmode].mult;
10150
10151           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10152                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10153           return true;
10154         }
10155       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10156         {
10157           *cost = LIBCALL_COST (2);
10158           return false;
10159         }
10160
10161       if (mode == SImode)
10162         {
10163           if (TARGET_DSP_MULTIPLY
10164               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10165                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10166                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10167                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10168                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10169                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10170                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10171                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10172                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10173                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10174                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10175                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10176                                   == 16))))))
10177             {
10178               /* SMUL[TB][TB].  */
10179               if (speed_p)
10180                 *cost += extra_cost->mult[0].extend;
10181               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10182                                  SIGN_EXTEND, 0, speed_p);
10183               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10184                                  SIGN_EXTEND, 1, speed_p);
10185               return true;
10186             }
10187           if (speed_p)
10188             *cost += extra_cost->mult[0].simple;
10189           return false;
10190         }
10191
10192       if (mode == DImode)
10193         {
10194           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10195                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10196                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10197                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10198             {
10199               if (speed_p)
10200                 *cost += extra_cost->mult[1].extend;
10201               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10202                                   ZERO_EXTEND, 0, speed_p)
10203                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10204                                     ZERO_EXTEND, 0, speed_p));
10205               return true;
10206             }
10207
10208           *cost = LIBCALL_COST (2);
10209           return false;
10210         }
10211
10212       /* Vector mode?  */
10213       *cost = LIBCALL_COST (2);
10214       return false;
10215
10216     case NEG:
10217       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10218           && (mode == SFmode || !TARGET_VFP_SINGLE))
10219         {
10220           if (GET_CODE (XEXP (x, 0)) == MULT)
10221             {
10222               /* VNMUL.  */
10223               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10224               return true;
10225             }
10226
10227           if (speed_p)
10228             *cost += extra_cost->fp[mode != SFmode].neg;
10229
10230           return false;
10231         }
10232       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10233         {
10234           *cost = LIBCALL_COST (1);
10235           return false;
10236         }
10237
10238       if (mode == SImode)
10239         {
10240           if (GET_CODE (XEXP (x, 0)) == ABS)
10241             {
10242               *cost += COSTS_N_INSNS (1);
10243               /* Assume the non-flag-changing variant.  */
10244               if (speed_p)
10245                 *cost += (extra_cost->alu.log_shift
10246                           + extra_cost->alu.arith_shift);
10247               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10248               return true;
10249             }
10250
10251           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10252               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10253             {
10254               *cost += COSTS_N_INSNS (1);
10255               /* No extra cost for MOV imm and MVN imm.  */
10256               /* If the comparison op is using the flags, there's no further
10257                  cost, otherwise we need to add the cost of the comparison.  */
10258               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10259                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10260                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10261                 {
10262                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10263                   *cost += (COSTS_N_INSNS (1)
10264                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10265                                         0, speed_p)
10266                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10267                                         1, speed_p));
10268                   if (speed_p)
10269                     *cost += extra_cost->alu.arith;
10270                 }
10271               return true;
10272             }
10273
10274           if (speed_p)
10275             *cost += extra_cost->alu.arith;
10276           return false;
10277         }
10278
10279       if (GET_MODE_CLASS (mode) == MODE_INT
10280           && GET_MODE_SIZE (mode) < 4)
10281         {
10282           /* Slightly disparage, as we might need an extend operation.  */
10283           *cost += 1;
10284           if (speed_p)
10285             *cost += extra_cost->alu.arith;
10286           return false;
10287         }
10288
10289       if (mode == DImode)
10290         {
10291           *cost += COSTS_N_INSNS (1);
10292           if (speed_p)
10293             *cost += 2 * extra_cost->alu.arith;
10294           return false;
10295         }
10296
10297       /* Vector mode?  */
10298       *cost = LIBCALL_COST (1);
10299       return false;
10300
10301     case NOT:
10302       if (mode == SImode)
10303         {
10304           rtx shift_op;
10305           rtx shift_reg = NULL;
10306
10307           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10308
10309           if (shift_op)
10310             {
10311               if (shift_reg != NULL)
10312                 {
10313                   if (speed_p)
10314                     *cost += extra_cost->alu.log_shift_reg;
10315                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10316                 }
10317               else if (speed_p)
10318                 *cost += extra_cost->alu.log_shift;
10319               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10320               return true;
10321             }
10322
10323           if (speed_p)
10324             *cost += extra_cost->alu.logical;
10325           return false;
10326         }
10327       if (mode == DImode)
10328         {
10329           *cost += COSTS_N_INSNS (1);
10330           return false;
10331         }
10332
10333       /* Vector mode?  */
10334
10335       *cost += LIBCALL_COST (1);
10336       return false;
10337
10338     case IF_THEN_ELSE:
10339       {
10340         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10341           {
10342             *cost += COSTS_N_INSNS (3);
10343             return true;
10344           }
10345         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10346         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10347
10348         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10349         /* Assume that if one arm of the if_then_else is a register,
10350            that it will be tied with the result and eliminate the
10351            conditional insn.  */
10352         if (REG_P (XEXP (x, 1)))
10353           *cost += op2cost;
10354         else if (REG_P (XEXP (x, 2)))
10355           *cost += op1cost;
10356         else
10357           {
10358             if (speed_p)
10359               {
10360                 if (extra_cost->alu.non_exec_costs_exec)
10361                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10362                 else
10363                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10364               }
10365             else
10366               *cost += op1cost + op2cost;
10367           }
10368       }
10369       return true;
10370
10371     case COMPARE:
10372       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10373         *cost = 0;
10374       else
10375         {
10376           machine_mode op0mode;
10377           /* We'll mostly assume that the cost of a compare is the cost of the
10378              LHS.  However, there are some notable exceptions.  */
10379
10380           /* Floating point compares are never done as side-effects.  */
10381           op0mode = GET_MODE (XEXP (x, 0));
10382           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10383               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10384             {
10385               if (speed_p)
10386                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10387
10388               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10389                 {
10390                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10391                   return true;
10392                 }
10393
10394               return false;
10395             }
10396           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10397             {
10398               *cost = LIBCALL_COST (2);
10399               return false;
10400             }
10401
10402           /* DImode compares normally take two insns.  */
10403           if (op0mode == DImode)
10404             {
10405               *cost += COSTS_N_INSNS (1);
10406               if (speed_p)
10407                 *cost += 2 * extra_cost->alu.arith;
10408               return false;
10409             }
10410
10411           if (op0mode == SImode)
10412             {
10413               rtx shift_op;
10414               rtx shift_reg;
10415
10416               if (XEXP (x, 1) == const0_rtx
10417                   && !(REG_P (XEXP (x, 0))
10418                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10419                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10420                 {
10421                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10422
10423                   /* Multiply operations that set the flags are often
10424                      significantly more expensive.  */
10425                   if (speed_p
10426                       && GET_CODE (XEXP (x, 0)) == MULT
10427                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10428                     *cost += extra_cost->mult[0].flag_setting;
10429
10430                   if (speed_p
10431                       && GET_CODE (XEXP (x, 0)) == PLUS
10432                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10433                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10434                                                             0), 1), mode))
10435                     *cost += extra_cost->mult[0].flag_setting;
10436                   return true;
10437                 }
10438
10439               shift_reg = NULL;
10440               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10441               if (shift_op != NULL)
10442                 {
10443                   if (shift_reg != NULL)
10444                     {
10445                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10446                                          1, speed_p);
10447                       if (speed_p)
10448                         *cost += extra_cost->alu.arith_shift_reg;
10449                     }
10450                   else if (speed_p)
10451                     *cost += extra_cost->alu.arith_shift;
10452                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10453                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10454                   return true;
10455                 }
10456
10457               if (speed_p)
10458                 *cost += extra_cost->alu.arith;
10459               if (CONST_INT_P (XEXP (x, 1))
10460                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10461                 {
10462                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10463                   return true;
10464                 }
10465               return false;
10466             }
10467
10468           /* Vector mode?  */
10469
10470           *cost = LIBCALL_COST (2);
10471           return false;
10472         }
10473       return true;
10474
10475     case EQ:
10476     case NE:
10477     case LT:
10478     case LE:
10479     case GT:
10480     case GE:
10481     case LTU:
10482     case LEU:
10483     case GEU:
10484     case GTU:
10485     case ORDERED:
10486     case UNORDERED:
10487     case UNEQ:
10488     case UNLE:
10489     case UNLT:
10490     case UNGE:
10491     case UNGT:
10492     case LTGT:
10493       if (outer_code == SET)
10494         {
10495           /* Is it a store-flag operation?  */
10496           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10497               && XEXP (x, 1) == const0_rtx)
10498             {
10499               /* Thumb also needs an IT insn.  */
10500               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10501               return true;
10502             }
10503           if (XEXP (x, 1) == const0_rtx)
10504             {
10505               switch (code)
10506                 {
10507                 case LT:
10508                   /* LSR Rd, Rn, #31.  */
10509                   if (speed_p)
10510                     *cost += extra_cost->alu.shift;
10511                   break;
10512
10513                 case EQ:
10514                   /* RSBS T1, Rn, #0
10515                      ADC  Rd, Rn, T1.  */
10516
10517                 case NE:
10518                   /* SUBS T1, Rn, #1
10519                      SBC  Rd, Rn, T1.  */
10520                   *cost += COSTS_N_INSNS (1);
10521                   break;
10522
10523                 case LE:
10524                   /* RSBS T1, Rn, Rn, LSR #31
10525                      ADC  Rd, Rn, T1. */
10526                   *cost += COSTS_N_INSNS (1);
10527                   if (speed_p)
10528                     *cost += extra_cost->alu.arith_shift;
10529                   break;
10530
10531                 case GT:
10532                   /* RSB  Rd, Rn, Rn, ASR #1
10533                      LSR  Rd, Rd, #31.  */
10534                   *cost += COSTS_N_INSNS (1);
10535                   if (speed_p)
10536                     *cost += (extra_cost->alu.arith_shift
10537                               + extra_cost->alu.shift);
10538                   break;
10539
10540                 case GE:
10541                   /* ASR  Rd, Rn, #31
10542                      ADD  Rd, Rn, #1.  */
10543                   *cost += COSTS_N_INSNS (1);
10544                   if (speed_p)
10545                     *cost += extra_cost->alu.shift;
10546                   break;
10547
10548                 default:
10549                   /* Remaining cases are either meaningless or would take
10550                      three insns anyway.  */
10551                   *cost = COSTS_N_INSNS (3);
10552                   break;
10553                 }
10554               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10555               return true;
10556             }
10557           else
10558             {
10559               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10560               if (CONST_INT_P (XEXP (x, 1))
10561                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10562                 {
10563                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10564                   return true;
10565                 }
10566
10567               return false;
10568             }
10569         }
10570       /* Not directly inside a set.  If it involves the condition code
10571          register it must be the condition for a branch, cond_exec or
10572          I_T_E operation.  Since the comparison is performed elsewhere
10573          this is just the control part which has no additional
10574          cost.  */
10575       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10576                && XEXP (x, 1) == const0_rtx)
10577         {
10578           *cost = 0;
10579           return true;
10580         }
10581       return false;
10582
10583     case ABS:
10584       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10585           && (mode == SFmode || !TARGET_VFP_SINGLE))
10586         {
10587           if (speed_p)
10588             *cost += extra_cost->fp[mode != SFmode].neg;
10589
10590           return false;
10591         }
10592       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10593         {
10594           *cost = LIBCALL_COST (1);
10595           return false;
10596         }
10597
10598       if (mode == SImode)
10599         {
10600           if (speed_p)
10601             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10602           return false;
10603         }
10604       /* Vector mode?  */
10605       *cost = LIBCALL_COST (1);
10606       return false;
10607
10608     case SIGN_EXTEND:
10609       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10610           && MEM_P (XEXP (x, 0)))
10611         {
10612           if (mode == DImode)
10613             *cost += COSTS_N_INSNS (1);
10614
10615           if (!speed_p)
10616             return true;
10617
10618           if (GET_MODE (XEXP (x, 0)) == SImode)
10619             *cost += extra_cost->ldst.load;
10620           else
10621             *cost += extra_cost->ldst.load_sign_extend;
10622
10623           if (mode == DImode)
10624             *cost += extra_cost->alu.shift;
10625
10626           return true;
10627         }
10628
10629       /* Widening from less than 32-bits requires an extend operation.  */
10630       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10631         {
10632           /* We have SXTB/SXTH.  */
10633           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10634           if (speed_p)
10635             *cost += extra_cost->alu.extend;
10636         }
10637       else if (GET_MODE (XEXP (x, 0)) != SImode)
10638         {
10639           /* Needs two shifts.  */
10640           *cost += COSTS_N_INSNS (1);
10641           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10642           if (speed_p)
10643             *cost += 2 * extra_cost->alu.shift;
10644         }
10645
10646       /* Widening beyond 32-bits requires one more insn.  */
10647       if (mode == DImode)
10648         {
10649           *cost += COSTS_N_INSNS (1);
10650           if (speed_p)
10651             *cost += extra_cost->alu.shift;
10652         }
10653
10654       return true;
10655
10656     case ZERO_EXTEND:
10657       if ((arm_arch4
10658            || GET_MODE (XEXP (x, 0)) == SImode
10659            || GET_MODE (XEXP (x, 0)) == QImode)
10660           && MEM_P (XEXP (x, 0)))
10661         {
10662           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10663
10664           if (mode == DImode)
10665             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10666
10667           return true;
10668         }
10669
10670       /* Widening from less than 32-bits requires an extend operation.  */
10671       if (GET_MODE (XEXP (x, 0)) == QImode)
10672         {
10673           /* UXTB can be a shorter instruction in Thumb2, but it might
10674              be slower than the AND Rd, Rn, #255 alternative.  When
10675              optimizing for speed it should never be slower to use
10676              AND, and we don't really model 16-bit vs 32-bit insns
10677              here.  */
10678           if (speed_p)
10679             *cost += extra_cost->alu.logical;
10680         }
10681       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10682         {
10683           /* We have UXTB/UXTH.  */
10684           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10685           if (speed_p)
10686             *cost += extra_cost->alu.extend;
10687         }
10688       else if (GET_MODE (XEXP (x, 0)) != SImode)
10689         {
10690           /* Needs two shifts.  It's marginally preferable to use
10691              shifts rather than two BIC instructions as the second
10692              shift may merge with a subsequent insn as a shifter
10693              op.  */
10694           *cost = COSTS_N_INSNS (2);
10695           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10696           if (speed_p)
10697             *cost += 2 * extra_cost->alu.shift;
10698         }
10699
10700       /* Widening beyond 32-bits requires one more insn.  */
10701       if (mode == DImode)
10702         {
10703           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10704         }
10705
10706       return true;
10707
10708     case CONST_INT:
10709       *cost = 0;
10710       /* CONST_INT has no mode, so we cannot tell for sure how many
10711          insns are really going to be needed.  The best we can do is
10712          look at the value passed.  If it fits in SImode, then assume
10713          that's the mode it will be used for.  Otherwise assume it
10714          will be used in DImode.  */
10715       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10716         mode = SImode;
10717       else
10718         mode = DImode;
10719
10720       /* Avoid blowing up in arm_gen_constant ().  */
10721       if (!(outer_code == PLUS
10722             || outer_code == AND
10723             || outer_code == IOR
10724             || outer_code == XOR
10725             || outer_code == MINUS))
10726         outer_code = SET;
10727
10728     const_int_cost:
10729       if (mode == SImode)
10730         {
10731           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10732                                                     INTVAL (x), NULL, NULL,
10733                                                     0, 0));
10734           /* Extra costs?  */
10735         }
10736       else
10737         {
10738           *cost += COSTS_N_INSNS (arm_gen_constant
10739                                   (outer_code, SImode, NULL,
10740                                    trunc_int_for_mode (INTVAL (x), SImode),
10741                                    NULL, NULL, 0, 0)
10742                                   + arm_gen_constant (outer_code, SImode, NULL,
10743                                                       INTVAL (x) >> 32, NULL,
10744                                                       NULL, 0, 0));
10745           /* Extra costs?  */
10746         }
10747
10748       return true;
10749
10750     case CONST:
10751     case LABEL_REF:
10752     case SYMBOL_REF:
10753       if (speed_p)
10754         {
10755           if (arm_arch_thumb2 && !flag_pic)
10756             *cost += COSTS_N_INSNS (1);
10757           else
10758             *cost += extra_cost->ldst.load;
10759         }
10760       else
10761         *cost += COSTS_N_INSNS (1);
10762
10763       if (flag_pic)
10764         {
10765           *cost += COSTS_N_INSNS (1);
10766           if (speed_p)
10767             *cost += extra_cost->alu.arith;
10768         }
10769
10770       return true;
10771
10772     case CONST_FIXED:
10773       *cost = COSTS_N_INSNS (4);
10774       /* Fixme.  */
10775       return true;
10776
10777     case CONST_DOUBLE:
10778       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10779           && (mode == SFmode || !TARGET_VFP_SINGLE))
10780         {
10781           if (vfp3_const_double_rtx (x))
10782             {
10783               if (speed_p)
10784                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10785               return true;
10786             }
10787
10788           if (speed_p)
10789             {
10790               if (mode == DFmode)
10791                 *cost += extra_cost->ldst.loadd;
10792               else
10793                 *cost += extra_cost->ldst.loadf;
10794             }
10795           else
10796             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10797
10798           return true;
10799         }
10800       *cost = COSTS_N_INSNS (4);
10801       return true;
10802
10803     case CONST_VECTOR:
10804       /* Fixme.  */
10805       if (TARGET_NEON
10806           && TARGET_HARD_FLOAT
10807           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10808           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10809         *cost = COSTS_N_INSNS (1);
10810       else
10811         *cost = COSTS_N_INSNS (4);
10812       return true;
10813
10814     case HIGH:
10815     case LO_SUM:
10816       /* When optimizing for size, we prefer constant pool entries to
10817          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10818       if (!speed_p)
10819         *cost += 1;
10820       return true;
10821
10822     case CLZ:
10823       if (speed_p)
10824         *cost += extra_cost->alu.clz;
10825       return false;
10826
10827     case SMIN:
10828       if (XEXP (x, 1) == const0_rtx)
10829         {
10830           if (speed_p)
10831             *cost += extra_cost->alu.log_shift;
10832           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10833           return true;
10834         }
10835       /* Fall through.  */
10836     case SMAX:
10837     case UMIN:
10838     case UMAX:
10839       *cost += COSTS_N_INSNS (1);
10840       return false;
10841
10842     case TRUNCATE:
10843       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10844           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10845           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10846           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10847           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10848                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10849               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10850                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10851                       == ZERO_EXTEND))))
10852         {
10853           if (speed_p)
10854             *cost += extra_cost->mult[1].extend;
10855           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10856                               ZERO_EXTEND, 0, speed_p)
10857                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10858                                 ZERO_EXTEND, 0, speed_p));
10859           return true;
10860         }
10861       *cost = LIBCALL_COST (1);
10862       return false;
10863
10864     case UNSPEC_VOLATILE:
10865     case UNSPEC:
10866       return arm_unspec_cost (x, outer_code, speed_p, cost);
10867
10868     case PC:
10869       /* Reading the PC is like reading any other register.  Writing it
10870          is more expensive, but we take that into account elsewhere.  */
10871       *cost = 0;
10872       return true;
10873
10874     case ZERO_EXTRACT:
10875       /* TODO: Simple zero_extract of bottom bits using AND.  */
10876       /* Fall through.  */
10877     case SIGN_EXTRACT:
10878       if (arm_arch6
10879           && mode == SImode
10880           && CONST_INT_P (XEXP (x, 1))
10881           && CONST_INT_P (XEXP (x, 2)))
10882         {
10883           if (speed_p)
10884             *cost += extra_cost->alu.bfx;
10885           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10886           return true;
10887         }
10888       /* Without UBFX/SBFX, need to resort to shift operations.  */
10889       *cost += COSTS_N_INSNS (1);
10890       if (speed_p)
10891         *cost += 2 * extra_cost->alu.shift;
10892       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10893       return true;
10894
10895     case FLOAT_EXTEND:
10896       if (TARGET_HARD_FLOAT)
10897         {
10898           if (speed_p)
10899             *cost += extra_cost->fp[mode == DFmode].widen;
10900           if (!TARGET_VFP5
10901               && GET_MODE (XEXP (x, 0)) == HFmode)
10902             {
10903               /* Pre v8, widening HF->DF is a two-step process, first
10904                  widening to SFmode.  */
10905               *cost += COSTS_N_INSNS (1);
10906               if (speed_p)
10907                 *cost += extra_cost->fp[0].widen;
10908             }
10909           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10910           return true;
10911         }
10912
10913       *cost = LIBCALL_COST (1);
10914       return false;
10915
10916     case FLOAT_TRUNCATE:
10917       if (TARGET_HARD_FLOAT)
10918         {
10919           if (speed_p)
10920             *cost += extra_cost->fp[mode == DFmode].narrow;
10921           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10922           return true;
10923           /* Vector modes?  */
10924         }
10925       *cost = LIBCALL_COST (1);
10926       return false;
10927
10928     case FMA:
10929       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10930         {
10931           rtx op0 = XEXP (x, 0);
10932           rtx op1 = XEXP (x, 1);
10933           rtx op2 = XEXP (x, 2);
10934
10935
10936           /* vfms or vfnma.  */
10937           if (GET_CODE (op0) == NEG)
10938             op0 = XEXP (op0, 0);
10939
10940           /* vfnms or vfnma.  */
10941           if (GET_CODE (op2) == NEG)
10942             op2 = XEXP (op2, 0);
10943
10944           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10945           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10946           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10947
10948           if (speed_p)
10949             *cost += extra_cost->fp[mode ==DFmode].fma;
10950
10951           return true;
10952         }
10953
10954       *cost = LIBCALL_COST (3);
10955       return false;
10956
10957     case FIX:
10958     case UNSIGNED_FIX:
10959       if (TARGET_HARD_FLOAT)
10960         {
10961           /* The *combine_vcvtf2i reduces a vmul+vcvt into
10962              a vcvt fixed-point conversion.  */
10963           if (code == FIX && mode == SImode
10964               && GET_CODE (XEXP (x, 0)) == FIX
10965               && GET_MODE (XEXP (x, 0)) == SFmode
10966               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10967               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10968                  > 0)
10969             {
10970               if (speed_p)
10971                 *cost += extra_cost->fp[0].toint;
10972
10973               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10974                                  code, 0, speed_p);
10975               return true;
10976             }
10977
10978           if (GET_MODE_CLASS (mode) == MODE_INT)
10979             {
10980               mode = GET_MODE (XEXP (x, 0));
10981               if (speed_p)
10982                 *cost += extra_cost->fp[mode == DFmode].toint;
10983               /* Strip of the 'cost' of rounding towards zero.  */
10984               if (GET_CODE (XEXP (x, 0)) == FIX)
10985                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10986                                    0, speed_p);
10987               else
10988                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10989               /* ??? Increase the cost to deal with transferring from
10990                  FP -> CORE registers?  */
10991               return true;
10992             }
10993           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10994                    && TARGET_VFP5)
10995             {
10996               if (speed_p)
10997                 *cost += extra_cost->fp[mode == DFmode].roundint;
10998               return false;
10999             }
11000           /* Vector costs? */
11001         }
11002       *cost = LIBCALL_COST (1);
11003       return false;
11004
11005     case FLOAT:
11006     case UNSIGNED_FLOAT:
11007       if (TARGET_HARD_FLOAT)
11008         {
11009           /* ??? Increase the cost to deal with transferring from CORE
11010              -> FP registers?  */
11011           if (speed_p)
11012             *cost += extra_cost->fp[mode == DFmode].fromint;
11013           return false;
11014         }
11015       *cost = LIBCALL_COST (1);
11016       return false;
11017
11018     case CALL:
11019       return true;
11020
11021     case ASM_OPERANDS:
11022       {
11023       /* Just a guess.  Guess number of instructions in the asm
11024          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11025          though (see PR60663).  */
11026         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11027         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11028
11029         *cost = COSTS_N_INSNS (asm_length + num_operands);
11030         return true;
11031       }
11032     default:
11033       if (mode != VOIDmode)
11034         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11035       else
11036         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11037       return false;
11038     }
11039 }
11040
11041 #undef HANDLE_NARROW_SHIFT_ARITH
11042
11043 /* RTX costs entry point.  */
11044
11045 static bool
11046 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11047                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11048 {
11049   bool result;
11050   int code = GET_CODE (x);
11051   gcc_assert (current_tune->insn_extra_cost);
11052
11053   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11054                                 (enum rtx_code) outer_code,
11055                                 current_tune->insn_extra_cost,
11056                                 total, speed);
11057
11058   if (dump_file && arm_verbose_cost)
11059     {
11060       print_rtl_single (dump_file, x);
11061       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11062                *total, result ? "final" : "partial");
11063     }
11064   return result;
11065 }
11066
11067 /* All address computations that can be done are free, but rtx cost returns
11068    the same for practically all of them.  So we weight the different types
11069    of address here in the order (most pref first):
11070    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11071 static inline int
11072 arm_arm_address_cost (rtx x)
11073 {
11074   enum rtx_code c  = GET_CODE (x);
11075
11076   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11077     return 0;
11078   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11079     return 10;
11080
11081   if (c == PLUS)
11082     {
11083       if (CONST_INT_P (XEXP (x, 1)))
11084         return 2;
11085
11086       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11087         return 3;
11088
11089       return 4;
11090     }
11091
11092   return 6;
11093 }
11094
11095 static inline int
11096 arm_thumb_address_cost (rtx x)
11097 {
11098   enum rtx_code c  = GET_CODE (x);
11099
11100   if (c == REG)
11101     return 1;
11102   if (c == PLUS
11103       && REG_P (XEXP (x, 0))
11104       && CONST_INT_P (XEXP (x, 1)))
11105     return 1;
11106
11107   return 2;
11108 }
11109
11110 static int
11111 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11112                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11113 {
11114   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11115 }
11116
11117 /* Adjust cost hook for XScale.  */
11118 static bool
11119 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11120                           int * cost)
11121 {
11122   /* Some true dependencies can have a higher cost depending
11123      on precisely how certain input operands are used.  */
11124   if (dep_type == 0
11125       && recog_memoized (insn) >= 0
11126       && recog_memoized (dep) >= 0)
11127     {
11128       int shift_opnum = get_attr_shift (insn);
11129       enum attr_type attr_type = get_attr_type (dep);
11130
11131       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11132          operand for INSN.  If we have a shifted input operand and the
11133          instruction we depend on is another ALU instruction, then we may
11134          have to account for an additional stall.  */
11135       if (shift_opnum != 0
11136           && (attr_type == TYPE_ALU_SHIFT_IMM
11137               || attr_type == TYPE_ALUS_SHIFT_IMM
11138               || attr_type == TYPE_LOGIC_SHIFT_IMM
11139               || attr_type == TYPE_LOGICS_SHIFT_IMM
11140               || attr_type == TYPE_ALU_SHIFT_REG
11141               || attr_type == TYPE_ALUS_SHIFT_REG
11142               || attr_type == TYPE_LOGIC_SHIFT_REG
11143               || attr_type == TYPE_LOGICS_SHIFT_REG
11144               || attr_type == TYPE_MOV_SHIFT
11145               || attr_type == TYPE_MVN_SHIFT
11146               || attr_type == TYPE_MOV_SHIFT_REG
11147               || attr_type == TYPE_MVN_SHIFT_REG))
11148         {
11149           rtx shifted_operand;
11150           int opno;
11151
11152           /* Get the shifted operand.  */
11153           extract_insn (insn);
11154           shifted_operand = recog_data.operand[shift_opnum];
11155
11156           /* Iterate over all the operands in DEP.  If we write an operand
11157              that overlaps with SHIFTED_OPERAND, then we have increase the
11158              cost of this dependency.  */
11159           extract_insn (dep);
11160           preprocess_constraints (dep);
11161           for (opno = 0; opno < recog_data.n_operands; opno++)
11162             {
11163               /* We can ignore strict inputs.  */
11164               if (recog_data.operand_type[opno] == OP_IN)
11165                 continue;
11166
11167               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11168                                            shifted_operand))
11169                 {
11170                   *cost = 2;
11171                   return false;
11172                 }
11173             }
11174         }
11175     }
11176   return true;
11177 }
11178
11179 /* Adjust cost hook for Cortex A9.  */
11180 static bool
11181 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11182                              int * cost)
11183 {
11184   switch (dep_type)
11185     {
11186     case REG_DEP_ANTI:
11187       *cost = 0;
11188       return false;
11189
11190     case REG_DEP_TRUE:
11191     case REG_DEP_OUTPUT:
11192         if (recog_memoized (insn) >= 0
11193             && recog_memoized (dep) >= 0)
11194           {
11195             if (GET_CODE (PATTERN (insn)) == SET)
11196               {
11197                 if (GET_MODE_CLASS
11198                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11199                   || GET_MODE_CLASS
11200                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11201                   {
11202                     enum attr_type attr_type_insn = get_attr_type (insn);
11203                     enum attr_type attr_type_dep = get_attr_type (dep);
11204
11205                     /* By default all dependencies of the form
11206                        s0 = s0 <op> s1
11207                        s0 = s0 <op> s2
11208                        have an extra latency of 1 cycle because
11209                        of the input and output dependency in this
11210                        case. However this gets modeled as an true
11211                        dependency and hence all these checks.  */
11212                     if (REG_P (SET_DEST (PATTERN (insn)))
11213                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11214                       {
11215                         /* FMACS is a special case where the dependent
11216                            instruction can be issued 3 cycles before
11217                            the normal latency in case of an output
11218                            dependency.  */
11219                         if ((attr_type_insn == TYPE_FMACS
11220                              || attr_type_insn == TYPE_FMACD)
11221                             && (attr_type_dep == TYPE_FMACS
11222                                 || attr_type_dep == TYPE_FMACD))
11223                           {
11224                             if (dep_type == REG_DEP_OUTPUT)
11225                               *cost = insn_default_latency (dep) - 3;
11226                             else
11227                               *cost = insn_default_latency (dep);
11228                             return false;
11229                           }
11230                         else
11231                           {
11232                             if (dep_type == REG_DEP_OUTPUT)
11233                               *cost = insn_default_latency (dep) + 1;
11234                             else
11235                               *cost = insn_default_latency (dep);
11236                           }
11237                         return false;
11238                       }
11239                   }
11240               }
11241           }
11242         break;
11243
11244     default:
11245       gcc_unreachable ();
11246     }
11247
11248   return true;
11249 }
11250
11251 /* Adjust cost hook for FA726TE.  */
11252 static bool
11253 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11254                            int * cost)
11255 {
11256   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11257      have penalty of 3.  */
11258   if (dep_type == REG_DEP_TRUE
11259       && recog_memoized (insn) >= 0
11260       && recog_memoized (dep) >= 0
11261       && get_attr_conds (dep) == CONDS_SET)
11262     {
11263       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11264       if (get_attr_conds (insn) == CONDS_USE
11265           && get_attr_type (insn) != TYPE_BRANCH)
11266         {
11267           *cost = 3;
11268           return false;
11269         }
11270
11271       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11272           || get_attr_conds (insn) == CONDS_USE)
11273         {
11274           *cost = 0;
11275           return false;
11276         }
11277     }
11278
11279   return true;
11280 }
11281
11282 /* Implement TARGET_REGISTER_MOVE_COST.
11283
11284    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11285    it is typically more expensive than a single memory access.  We set
11286    the cost to less than two memory accesses so that floating
11287    point to integer conversion does not go through memory.  */
11288
11289 int
11290 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11291                         reg_class_t from, reg_class_t to)
11292 {
11293   if (TARGET_32BIT)
11294     {
11295       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11296           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11297         return 15;
11298       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11299                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11300         return 4;
11301       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11302         return 20;
11303       else
11304         return 2;
11305     }
11306   else
11307     {
11308       if (from == HI_REGS || to == HI_REGS)
11309         return 4;
11310       else
11311         return 2;
11312     }
11313 }
11314
11315 /* Implement TARGET_MEMORY_MOVE_COST.  */
11316
11317 int
11318 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11319                       bool in ATTRIBUTE_UNUSED)
11320 {
11321   if (TARGET_32BIT)
11322     return 10;
11323   else
11324     {
11325       if (GET_MODE_SIZE (mode) < 4)
11326         return 8;
11327       else
11328         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11329     }
11330 }
11331
11332 /* Vectorizer cost model implementation.  */
11333
11334 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11335 static int
11336 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11337                                 tree vectype,
11338                                 int misalign ATTRIBUTE_UNUSED)
11339 {
11340   unsigned elements;
11341
11342   switch (type_of_cost)
11343     {
11344       case scalar_stmt:
11345         return current_tune->vec_costs->scalar_stmt_cost;
11346
11347       case scalar_load:
11348         return current_tune->vec_costs->scalar_load_cost;
11349
11350       case scalar_store:
11351         return current_tune->vec_costs->scalar_store_cost;
11352
11353       case vector_stmt:
11354         return current_tune->vec_costs->vec_stmt_cost;
11355
11356       case vector_load:
11357         return current_tune->vec_costs->vec_align_load_cost;
11358
11359       case vector_store:
11360         return current_tune->vec_costs->vec_store_cost;
11361
11362       case vec_to_scalar:
11363         return current_tune->vec_costs->vec_to_scalar_cost;
11364
11365       case scalar_to_vec:
11366         return current_tune->vec_costs->scalar_to_vec_cost;
11367
11368       case unaligned_load:
11369       case vector_gather_load:
11370         return current_tune->vec_costs->vec_unalign_load_cost;
11371
11372       case unaligned_store:
11373       case vector_scatter_store:
11374         return current_tune->vec_costs->vec_unalign_store_cost;
11375
11376       case cond_branch_taken:
11377         return current_tune->vec_costs->cond_taken_branch_cost;
11378
11379       case cond_branch_not_taken:
11380         return current_tune->vec_costs->cond_not_taken_branch_cost;
11381
11382       case vec_perm:
11383       case vec_promote_demote:
11384         return current_tune->vec_costs->vec_stmt_cost;
11385
11386       case vec_construct:
11387         elements = TYPE_VECTOR_SUBPARTS (vectype);
11388         return elements / 2 + 1;
11389
11390       default:
11391         gcc_unreachable ();
11392     }
11393 }
11394
11395 /* Implement targetm.vectorize.add_stmt_cost.  */
11396
11397 static unsigned
11398 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11399                    struct _stmt_vec_info *stmt_info, int misalign,
11400                    enum vect_cost_model_location where)
11401 {
11402   unsigned *cost = (unsigned *) data;
11403   unsigned retval = 0;
11404
11405   if (flag_vect_cost_model)
11406     {
11407       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11408       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11409
11410       /* Statements in an inner loop relative to the loop being
11411          vectorized are weighted more heavily.  The value here is
11412          arbitrary and could potentially be improved with analysis.  */
11413       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11414         count *= 50;  /* FIXME.  */
11415
11416       retval = (unsigned) (count * stmt_cost);
11417       cost[where] += retval;
11418     }
11419
11420   return retval;
11421 }
11422
11423 /* Return true if and only if this insn can dual-issue only as older.  */
11424 static bool
11425 cortexa7_older_only (rtx_insn *insn)
11426 {
11427   if (recog_memoized (insn) < 0)
11428     return false;
11429
11430   switch (get_attr_type (insn))
11431     {
11432     case TYPE_ALU_DSP_REG:
11433     case TYPE_ALU_SREG:
11434     case TYPE_ALUS_SREG:
11435     case TYPE_LOGIC_REG:
11436     case TYPE_LOGICS_REG:
11437     case TYPE_ADC_REG:
11438     case TYPE_ADCS_REG:
11439     case TYPE_ADR:
11440     case TYPE_BFM:
11441     case TYPE_REV:
11442     case TYPE_MVN_REG:
11443     case TYPE_SHIFT_IMM:
11444     case TYPE_SHIFT_REG:
11445     case TYPE_LOAD_BYTE:
11446     case TYPE_LOAD_4:
11447     case TYPE_STORE_4:
11448     case TYPE_FFARITHS:
11449     case TYPE_FADDS:
11450     case TYPE_FFARITHD:
11451     case TYPE_FADDD:
11452     case TYPE_FMOV:
11453     case TYPE_F_CVT:
11454     case TYPE_FCMPS:
11455     case TYPE_FCMPD:
11456     case TYPE_FCONSTS:
11457     case TYPE_FCONSTD:
11458     case TYPE_FMULS:
11459     case TYPE_FMACS:
11460     case TYPE_FMULD:
11461     case TYPE_FMACD:
11462     case TYPE_FDIVS:
11463     case TYPE_FDIVD:
11464     case TYPE_F_MRC:
11465     case TYPE_F_MRRC:
11466     case TYPE_F_FLAG:
11467     case TYPE_F_LOADS:
11468     case TYPE_F_STORES:
11469       return true;
11470     default:
11471       return false;
11472     }
11473 }
11474
11475 /* Return true if and only if this insn can dual-issue as younger.  */
11476 static bool
11477 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11478 {
11479   if (recog_memoized (insn) < 0)
11480     {
11481       if (verbose > 5)
11482         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11483       return false;
11484     }
11485
11486   switch (get_attr_type (insn))
11487     {
11488     case TYPE_ALU_IMM:
11489     case TYPE_ALUS_IMM:
11490     case TYPE_LOGIC_IMM:
11491     case TYPE_LOGICS_IMM:
11492     case TYPE_EXTEND:
11493     case TYPE_MVN_IMM:
11494     case TYPE_MOV_IMM:
11495     case TYPE_MOV_REG:
11496     case TYPE_MOV_SHIFT:
11497     case TYPE_MOV_SHIFT_REG:
11498     case TYPE_BRANCH:
11499     case TYPE_CALL:
11500       return true;
11501     default:
11502       return false;
11503     }
11504 }
11505
11506
11507 /* Look for an instruction that can dual issue only as an older
11508    instruction, and move it in front of any instructions that can
11509    dual-issue as younger, while preserving the relative order of all
11510    other instructions in the ready list.  This is a hueuristic to help
11511    dual-issue in later cycles, by postponing issue of more flexible
11512    instructions.  This heuristic may affect dual issue opportunities
11513    in the current cycle.  */
11514 static void
11515 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11516                         int *n_readyp, int clock)
11517 {
11518   int i;
11519   int first_older_only = -1, first_younger = -1;
11520
11521   if (verbose > 5)
11522     fprintf (file,
11523              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11524              clock,
11525              *n_readyp);
11526
11527   /* Traverse the ready list from the head (the instruction to issue
11528      first), and looking for the first instruction that can issue as
11529      younger and the first instruction that can dual-issue only as
11530      older.  */
11531   for (i = *n_readyp - 1; i >= 0; i--)
11532     {
11533       rtx_insn *insn = ready[i];
11534       if (cortexa7_older_only (insn))
11535         {
11536           first_older_only = i;
11537           if (verbose > 5)
11538             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11539           break;
11540         }
11541       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11542         first_younger = i;
11543     }
11544
11545   /* Nothing to reorder because either no younger insn found or insn
11546      that can dual-issue only as older appears before any insn that
11547      can dual-issue as younger.  */
11548   if (first_younger == -1)
11549     {
11550       if (verbose > 5)
11551         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11552       return;
11553     }
11554
11555   /* Nothing to reorder because no older-only insn in the ready list.  */
11556   if (first_older_only == -1)
11557     {
11558       if (verbose > 5)
11559         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11560       return;
11561     }
11562
11563   /* Move first_older_only insn before first_younger.  */
11564   if (verbose > 5)
11565     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11566              INSN_UID(ready [first_older_only]),
11567              INSN_UID(ready [first_younger]));
11568   rtx_insn *first_older_only_insn = ready [first_older_only];
11569   for (i = first_older_only; i < first_younger; i++)
11570     {
11571       ready[i] = ready[i+1];
11572     }
11573
11574   ready[i] = first_older_only_insn;
11575   return;
11576 }
11577
11578 /* Implement TARGET_SCHED_REORDER. */
11579 static int
11580 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11581                    int clock)
11582 {
11583   switch (arm_tune)
11584     {
11585     case TARGET_CPU_cortexa7:
11586       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11587       break;
11588     default:
11589       /* Do nothing for other cores.  */
11590       break;
11591     }
11592
11593   return arm_issue_rate ();
11594 }
11595
11596 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11597    It corrects the value of COST based on the relationship between
11598    INSN and DEP through the dependence LINK.  It returns the new
11599    value. There is a per-core adjust_cost hook to adjust scheduler costs
11600    and the per-core hook can choose to completely override the generic
11601    adjust_cost function. Only put bits of code into arm_adjust_cost that
11602    are common across all cores.  */
11603 static int
11604 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11605                  unsigned int)
11606 {
11607   rtx i_pat, d_pat;
11608
11609  /* When generating Thumb-1 code, we want to place flag-setting operations
11610     close to a conditional branch which depends on them, so that we can
11611     omit the comparison. */
11612   if (TARGET_THUMB1
11613       && dep_type == 0
11614       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11615       && recog_memoized (dep) >= 0
11616       && get_attr_conds (dep) == CONDS_SET)
11617     return 0;
11618
11619   if (current_tune->sched_adjust_cost != NULL)
11620     {
11621       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11622         return cost;
11623     }
11624
11625   /* XXX Is this strictly true?  */
11626   if (dep_type == REG_DEP_ANTI
11627       || dep_type == REG_DEP_OUTPUT)
11628     return 0;
11629
11630   /* Call insns don't incur a stall, even if they follow a load.  */
11631   if (dep_type == 0
11632       && CALL_P (insn))
11633     return 1;
11634
11635   if ((i_pat = single_set (insn)) != NULL
11636       && MEM_P (SET_SRC (i_pat))
11637       && (d_pat = single_set (dep)) != NULL
11638       && MEM_P (SET_DEST (d_pat)))
11639     {
11640       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11641       /* This is a load after a store, there is no conflict if the load reads
11642          from a cached area.  Assume that loads from the stack, and from the
11643          constant pool are cached, and that others will miss.  This is a
11644          hack.  */
11645
11646       if ((GET_CODE (src_mem) == SYMBOL_REF
11647            && CONSTANT_POOL_ADDRESS_P (src_mem))
11648           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11649           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11650           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11651         return 1;
11652     }
11653
11654   return cost;
11655 }
11656
11657 int
11658 arm_max_conditional_execute (void)
11659 {
11660   return max_insns_skipped;
11661 }
11662
11663 static int
11664 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11665 {
11666   if (TARGET_32BIT)
11667     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11668   else
11669     return (optimize > 0) ? 2 : 0;
11670 }
11671
11672 static int
11673 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11674 {
11675   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11676 }
11677
11678 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11679    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11680    sequences of non-executed instructions in IT blocks probably take the same
11681    amount of time as executed instructions (and the IT instruction itself takes
11682    space in icache).  This function was experimentally determined to give good
11683    results on a popular embedded benchmark.  */
11684
11685 static int
11686 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11687 {
11688   return (TARGET_32BIT && speed_p) ? 1
11689          : arm_default_branch_cost (speed_p, predictable_p);
11690 }
11691
11692 static int
11693 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11694 {
11695   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11696 }
11697
11698 static bool fp_consts_inited = false;
11699
11700 static REAL_VALUE_TYPE value_fp0;
11701
11702 static void
11703 init_fp_table (void)
11704 {
11705   REAL_VALUE_TYPE r;
11706
11707   r = REAL_VALUE_ATOF ("0", DFmode);
11708   value_fp0 = r;
11709   fp_consts_inited = true;
11710 }
11711
11712 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11713 int
11714 arm_const_double_rtx (rtx x)
11715 {
11716   const REAL_VALUE_TYPE *r;
11717
11718   if (!fp_consts_inited)
11719     init_fp_table ();
11720
11721   r = CONST_DOUBLE_REAL_VALUE (x);
11722   if (REAL_VALUE_MINUS_ZERO (*r))
11723     return 0;
11724
11725   if (real_equal (r, &value_fp0))
11726     return 1;
11727
11728   return 0;
11729 }
11730
11731 /* VFPv3 has a fairly wide range of representable immediates, formed from
11732    "quarter-precision" floating-point values. These can be evaluated using this
11733    formula (with ^ for exponentiation):
11734
11735      -1^s * n * 2^-r
11736
11737    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11738    16 <= n <= 31 and 0 <= r <= 7.
11739
11740    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11741
11742      - A (most-significant) is the sign bit.
11743      - BCD are the exponent (encoded as r XOR 3).
11744      - EFGH are the mantissa (encoded as n - 16).
11745 */
11746
11747 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11748    fconst[sd] instruction, or -1 if X isn't suitable.  */
11749 static int
11750 vfp3_const_double_index (rtx x)
11751 {
11752   REAL_VALUE_TYPE r, m;
11753   int sign, exponent;
11754   unsigned HOST_WIDE_INT mantissa, mant_hi;
11755   unsigned HOST_WIDE_INT mask;
11756   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11757   bool fail;
11758
11759   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11760     return -1;
11761
11762   r = *CONST_DOUBLE_REAL_VALUE (x);
11763
11764   /* We can't represent these things, so detect them first.  */
11765   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11766     return -1;
11767
11768   /* Extract sign, exponent and mantissa.  */
11769   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11770   r = real_value_abs (&r);
11771   exponent = REAL_EXP (&r);
11772   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11773      highest (sign) bit, with a fixed binary point at bit point_pos.
11774      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11775      bits for the mantissa, this may fail (low bits would be lost).  */
11776   real_ldexp (&m, &r, point_pos - exponent);
11777   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11778   mantissa = w.elt (0);
11779   mant_hi = w.elt (1);
11780
11781   /* If there are bits set in the low part of the mantissa, we can't
11782      represent this value.  */
11783   if (mantissa != 0)
11784     return -1;
11785
11786   /* Now make it so that mantissa contains the most-significant bits, and move
11787      the point_pos to indicate that the least-significant bits have been
11788      discarded.  */
11789   point_pos -= HOST_BITS_PER_WIDE_INT;
11790   mantissa = mant_hi;
11791
11792   /* We can permit four significant bits of mantissa only, plus a high bit
11793      which is always 1.  */
11794   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11795   if ((mantissa & mask) != 0)
11796     return -1;
11797
11798   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11799   mantissa >>= point_pos - 5;
11800
11801   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11802      floating-point immediate zero with Neon using an integer-zero load, but
11803      that case is handled elsewhere.)  */
11804   if (mantissa == 0)
11805     return -1;
11806
11807   gcc_assert (mantissa >= 16 && mantissa <= 31);
11808
11809   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11810      normalized significands are in the range [1, 2). (Our mantissa is shifted
11811      left 4 places at this point relative to normalized IEEE754 values).  GCC
11812      internally uses [0.5, 1) (see real.c), so the exponent returned from
11813      REAL_EXP must be altered.  */
11814   exponent = 5 - exponent;
11815
11816   if (exponent < 0 || exponent > 7)
11817     return -1;
11818
11819   /* Sign, mantissa and exponent are now in the correct form to plug into the
11820      formula described in the comment above.  */
11821   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11822 }
11823
11824 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11825 int
11826 vfp3_const_double_rtx (rtx x)
11827 {
11828   if (!TARGET_VFP3)
11829     return 0;
11830
11831   return vfp3_const_double_index (x) != -1;
11832 }
11833
11834 /* Recognize immediates which can be used in various Neon instructions. Legal
11835    immediates are described by the following table (for VMVN variants, the
11836    bitwise inverse of the constant shown is recognized. In either case, VMOV
11837    is output and the correct instruction to use for a given constant is chosen
11838    by the assembler). The constant shown is replicated across all elements of
11839    the destination vector.
11840
11841    insn elems variant constant (binary)
11842    ---- ----- ------- -----------------
11843    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11844    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11845    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11846    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11847    vmov  i16     4    00000000 abcdefgh
11848    vmov  i16     5    abcdefgh 00000000
11849    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11850    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11851    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11852    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11853    vmvn  i16    10    00000000 abcdefgh
11854    vmvn  i16    11    abcdefgh 00000000
11855    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11856    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11857    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11858    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11859    vmov   i8    16    abcdefgh
11860    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11861                       eeeeeeee ffffffff gggggggg hhhhhhhh
11862    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11863    vmov  f32    19    00000000 00000000 00000000 00000000
11864
11865    For case 18, B = !b. Representable values are exactly those accepted by
11866    vfp3_const_double_index, but are output as floating-point numbers rather
11867    than indices.
11868
11869    For case 19, we will change it to vmov.i32 when assembling.
11870
11871    Variants 0-5 (inclusive) may also be used as immediates for the second
11872    operand of VORR/VBIC instructions.
11873
11874    The INVERSE argument causes the bitwise inverse of the given operand to be
11875    recognized instead (used for recognizing legal immediates for the VAND/VORN
11876    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11877    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11878    output, rather than the real insns vbic/vorr).
11879
11880    INVERSE makes no difference to the recognition of float vectors.
11881
11882    The return value is the variant of immediate as shown in the above table, or
11883    -1 if the given value doesn't match any of the listed patterns.
11884 */
11885 static int
11886 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11887                       rtx *modconst, int *elementwidth)
11888 {
11889 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
11890   matches = 1;                                  \
11891   for (i = 0; i < idx; i += (STRIDE))           \
11892     if (!(TEST))                                \
11893       matches = 0;                              \
11894   if (matches)                                  \
11895     {                                           \
11896       immtype = (CLASS);                        \
11897       elsize = (ELSIZE);                        \
11898       break;                                    \
11899     }
11900
11901   unsigned int i, elsize = 0, idx = 0, n_elts;
11902   unsigned int innersize;
11903   unsigned char bytes[16];
11904   int immtype = -1, matches;
11905   unsigned int invmask = inverse ? 0xff : 0;
11906   bool vector = GET_CODE (op) == CONST_VECTOR;
11907
11908   if (vector)
11909     n_elts = CONST_VECTOR_NUNITS (op);
11910   else
11911     {
11912       n_elts = 1;
11913       if (mode == VOIDmode)
11914         mode = DImode;
11915     }
11916
11917   innersize = GET_MODE_UNIT_SIZE (mode);
11918
11919   /* Vectors of float constants.  */
11920   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11921     {
11922       rtx el0 = CONST_VECTOR_ELT (op, 0);
11923
11924       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11925         return -1;
11926
11927       /* FP16 vectors cannot be represented.  */
11928       if (GET_MODE_INNER (mode) == HFmode)
11929         return -1;
11930
11931       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11932          are distinct in this context.  */
11933       if (!const_vec_duplicate_p (op))
11934         return -1;
11935
11936       if (modconst)
11937         *modconst = CONST_VECTOR_ELT (op, 0);
11938
11939       if (elementwidth)
11940         *elementwidth = 0;
11941
11942       if (el0 == CONST0_RTX (GET_MODE (el0)))
11943         return 19;
11944       else
11945         return 18;
11946     }
11947
11948   /* The tricks done in the code below apply for little-endian vector layout.
11949      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11950      FIXME: Implement logic for big-endian vectors.  */
11951   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11952     return -1;
11953
11954   /* Splat vector constant out into a byte vector.  */
11955   for (i = 0; i < n_elts; i++)
11956     {
11957       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11958       unsigned HOST_WIDE_INT elpart;
11959
11960       gcc_assert (CONST_INT_P (el));
11961       elpart = INTVAL (el);
11962
11963       for (unsigned int byte = 0; byte < innersize; byte++)
11964         {
11965           bytes[idx++] = (elpart & 0xff) ^ invmask;
11966           elpart >>= BITS_PER_UNIT;
11967         }
11968     }
11969
11970   /* Sanity check.  */
11971   gcc_assert (idx == GET_MODE_SIZE (mode));
11972
11973   do
11974     {
11975       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11976                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11977
11978       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11979                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11980
11981       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11982                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11983
11984       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11985                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11986
11987       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11988
11989       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11990
11991       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11992                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11993
11994       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11995                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11996
11997       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11998                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11999
12000       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12001                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12002
12003       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12004
12005       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12006
12007       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12008                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12009
12010       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12011                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12012
12013       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12014                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12015
12016       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12017                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12018
12019       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12020
12021       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12022                         && bytes[i] == bytes[(i + 8) % idx]);
12023     }
12024   while (0);
12025
12026   if (immtype == -1)
12027     return -1;
12028
12029   if (elementwidth)
12030     *elementwidth = elsize;
12031
12032   if (modconst)
12033     {
12034       unsigned HOST_WIDE_INT imm = 0;
12035
12036       /* Un-invert bytes of recognized vector, if necessary.  */
12037       if (invmask != 0)
12038         for (i = 0; i < idx; i++)
12039           bytes[i] ^= invmask;
12040
12041       if (immtype == 17)
12042         {
12043           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12044           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12045
12046           for (i = 0; i < 8; i++)
12047             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12048                    << (i * BITS_PER_UNIT);
12049
12050           *modconst = GEN_INT (imm);
12051         }
12052       else
12053         {
12054           unsigned HOST_WIDE_INT imm = 0;
12055
12056           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12057             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12058
12059           *modconst = GEN_INT (imm);
12060         }
12061     }
12062
12063   return immtype;
12064 #undef CHECK
12065 }
12066
12067 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12068    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12069    float elements), and a modified constant (whatever should be output for a
12070    VMOV) in *MODCONST.  */
12071
12072 int
12073 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12074                                rtx *modconst, int *elementwidth)
12075 {
12076   rtx tmpconst;
12077   int tmpwidth;
12078   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12079
12080   if (retval == -1)
12081     return 0;
12082
12083   if (modconst)
12084     *modconst = tmpconst;
12085
12086   if (elementwidth)
12087     *elementwidth = tmpwidth;
12088
12089   return 1;
12090 }
12091
12092 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12093    the immediate is valid, write a constant suitable for using as an operand
12094    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12095    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12096
12097 int
12098 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12099                                 rtx *modconst, int *elementwidth)
12100 {
12101   rtx tmpconst;
12102   int tmpwidth;
12103   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12104
12105   if (retval < 0 || retval > 5)
12106     return 0;
12107
12108   if (modconst)
12109     *modconst = tmpconst;
12110
12111   if (elementwidth)
12112     *elementwidth = tmpwidth;
12113
12114   return 1;
12115 }
12116
12117 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12118    the immediate is valid, write a constant suitable for using as an operand
12119    to VSHR/VSHL to *MODCONST and the corresponding element width to
12120    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12121    because they have different limitations.  */
12122
12123 int
12124 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12125                                 rtx *modconst, int *elementwidth,
12126                                 bool isleftshift)
12127 {
12128   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12129   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12130   unsigned HOST_WIDE_INT last_elt = 0;
12131   unsigned HOST_WIDE_INT maxshift;
12132
12133   /* Split vector constant out into a byte vector.  */
12134   for (i = 0; i < n_elts; i++)
12135     {
12136       rtx el = CONST_VECTOR_ELT (op, i);
12137       unsigned HOST_WIDE_INT elpart;
12138
12139       if (CONST_INT_P (el))
12140         elpart = INTVAL (el);
12141       else if (CONST_DOUBLE_P (el))
12142         return 0;
12143       else
12144         gcc_unreachable ();
12145
12146       if (i != 0 && elpart != last_elt)
12147         return 0;
12148
12149       last_elt = elpart;
12150     }
12151
12152   /* Shift less than element size.  */
12153   maxshift = innersize * 8;
12154
12155   if (isleftshift)
12156     {
12157       /* Left shift immediate value can be from 0 to <size>-1.  */
12158       if (last_elt >= maxshift)
12159         return 0;
12160     }
12161   else
12162     {
12163       /* Right shift immediate value can be from 1 to <size>.  */
12164       if (last_elt == 0 || last_elt > maxshift)
12165         return 0;
12166     }
12167
12168   if (elementwidth)
12169     *elementwidth = innersize * 8;
12170
12171   if (modconst)
12172     *modconst = CONST_VECTOR_ELT (op, 0);
12173
12174   return 1;
12175 }
12176
12177 /* Return a string suitable for output of Neon immediate logic operation
12178    MNEM.  */
12179
12180 char *
12181 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12182                              int inverse, int quad)
12183 {
12184   int width, is_valid;
12185   static char templ[40];
12186
12187   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12188
12189   gcc_assert (is_valid != 0);
12190
12191   if (quad)
12192     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12193   else
12194     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12195
12196   return templ;
12197 }
12198
12199 /* Return a string suitable for output of Neon immediate shift operation
12200    (VSHR or VSHL) MNEM.  */
12201
12202 char *
12203 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12204                              machine_mode mode, int quad,
12205                              bool isleftshift)
12206 {
12207   int width, is_valid;
12208   static char templ[40];
12209
12210   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12211   gcc_assert (is_valid != 0);
12212
12213   if (quad)
12214     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12215   else
12216     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12217
12218   return templ;
12219 }
12220
12221 /* Output a sequence of pairwise operations to implement a reduction.
12222    NOTE: We do "too much work" here, because pairwise operations work on two
12223    registers-worth of operands in one go. Unfortunately we can't exploit those
12224    extra calculations to do the full operation in fewer steps, I don't think.
12225    Although all vector elements of the result but the first are ignored, we
12226    actually calculate the same result in each of the elements. An alternative
12227    such as initially loading a vector with zero to use as each of the second
12228    operands would use up an additional register and take an extra instruction,
12229    for no particular gain.  */
12230
12231 void
12232 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12233                       rtx (*reduc) (rtx, rtx, rtx))
12234 {
12235   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12236   rtx tmpsum = op1;
12237
12238   for (i = parts / 2; i >= 1; i /= 2)
12239     {
12240       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12241       emit_insn (reduc (dest, tmpsum, tmpsum));
12242       tmpsum = dest;
12243     }
12244 }
12245
12246 /* If VALS is a vector constant that can be loaded into a register
12247    using VDUP, generate instructions to do so and return an RTX to
12248    assign to the register.  Otherwise return NULL_RTX.  */
12249
12250 static rtx
12251 neon_vdup_constant (rtx vals)
12252 {
12253   machine_mode mode = GET_MODE (vals);
12254   machine_mode inner_mode = GET_MODE_INNER (mode);
12255   rtx x;
12256
12257   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12258     return NULL_RTX;
12259
12260   if (!const_vec_duplicate_p (vals, &x))
12261     /* The elements are not all the same.  We could handle repeating
12262        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12263        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12264        vdup.i16).  */
12265     return NULL_RTX;
12266
12267   /* We can load this constant by using VDUP and a constant in a
12268      single ARM register.  This will be cheaper than a vector
12269      load.  */
12270
12271   x = copy_to_mode_reg (inner_mode, x);
12272   return gen_vec_duplicate (mode, x);
12273 }
12274
12275 /* Generate code to load VALS, which is a PARALLEL containing only
12276    constants (for vec_init) or CONST_VECTOR, efficiently into a
12277    register.  Returns an RTX to copy into the register, or NULL_RTX
12278    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12279
12280 rtx
12281 neon_make_constant (rtx vals)
12282 {
12283   machine_mode mode = GET_MODE (vals);
12284   rtx target;
12285   rtx const_vec = NULL_RTX;
12286   int n_elts = GET_MODE_NUNITS (mode);
12287   int n_const = 0;
12288   int i;
12289
12290   if (GET_CODE (vals) == CONST_VECTOR)
12291     const_vec = vals;
12292   else if (GET_CODE (vals) == PARALLEL)
12293     {
12294       /* A CONST_VECTOR must contain only CONST_INTs and
12295          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12296          Only store valid constants in a CONST_VECTOR.  */
12297       for (i = 0; i < n_elts; ++i)
12298         {
12299           rtx x = XVECEXP (vals, 0, i);
12300           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12301             n_const++;
12302         }
12303       if (n_const == n_elts)
12304         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12305     }
12306   else
12307     gcc_unreachable ();
12308
12309   if (const_vec != NULL
12310       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12311     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12312     return const_vec;
12313   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12314     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12315        pipeline cycle; creating the constant takes one or two ARM
12316        pipeline cycles.  */
12317     return target;
12318   else if (const_vec != NULL_RTX)
12319     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12320        (for either double or quad vectors).  We can not take advantage
12321        of single-cycle VLD1 because we need a PC-relative addressing
12322        mode.  */
12323     return const_vec;
12324   else
12325     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12326        We can not construct an initializer.  */
12327     return NULL_RTX;
12328 }
12329
12330 /* Initialize vector TARGET to VALS.  */
12331
12332 void
12333 neon_expand_vector_init (rtx target, rtx vals)
12334 {
12335   machine_mode mode = GET_MODE (target);
12336   machine_mode inner_mode = GET_MODE_INNER (mode);
12337   int n_elts = GET_MODE_NUNITS (mode);
12338   int n_var = 0, one_var = -1;
12339   bool all_same = true;
12340   rtx x, mem;
12341   int i;
12342
12343   for (i = 0; i < n_elts; ++i)
12344     {
12345       x = XVECEXP (vals, 0, i);
12346       if (!CONSTANT_P (x))
12347         ++n_var, one_var = i;
12348
12349       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12350         all_same = false;
12351     }
12352
12353   if (n_var == 0)
12354     {
12355       rtx constant = neon_make_constant (vals);
12356       if (constant != NULL_RTX)
12357         {
12358           emit_move_insn (target, constant);
12359           return;
12360         }
12361     }
12362
12363   /* Splat a single non-constant element if we can.  */
12364   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12365     {
12366       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12367       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12368       return;
12369     }
12370
12371   /* One field is non-constant.  Load constant then overwrite varying
12372      field.  This is more efficient than using the stack.  */
12373   if (n_var == 1)
12374     {
12375       rtx copy = copy_rtx (vals);
12376       rtx index = GEN_INT (one_var);
12377
12378       /* Load constant part of vector, substitute neighboring value for
12379          varying element.  */
12380       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12381       neon_expand_vector_init (target, copy);
12382
12383       /* Insert variable.  */
12384       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12385       switch (mode)
12386         {
12387         case E_V8QImode:
12388           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12389           break;
12390         case E_V16QImode:
12391           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12392           break;
12393         case E_V4HImode:
12394           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12395           break;
12396         case E_V8HImode:
12397           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12398           break;
12399         case E_V2SImode:
12400           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12401           break;
12402         case E_V4SImode:
12403           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12404           break;
12405         case E_V2SFmode:
12406           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12407           break;
12408         case E_V4SFmode:
12409           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12410           break;
12411         case E_V2DImode:
12412           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12413           break;
12414         default:
12415           gcc_unreachable ();
12416         }
12417       return;
12418     }
12419
12420   /* Construct the vector in memory one field at a time
12421      and load the whole vector.  */
12422   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12423   for (i = 0; i < n_elts; i++)
12424     emit_move_insn (adjust_address_nv (mem, inner_mode,
12425                                     i * GET_MODE_SIZE (inner_mode)),
12426                     XVECEXP (vals, 0, i));
12427   emit_move_insn (target, mem);
12428 }
12429
12430 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12431    ERR if it doesn't.  EXP indicates the source location, which includes the
12432    inlining history for intrinsics.  */
12433
12434 static void
12435 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12436               const_tree exp, const char *desc)
12437 {
12438   HOST_WIDE_INT lane;
12439
12440   gcc_assert (CONST_INT_P (operand));
12441
12442   lane = INTVAL (operand);
12443
12444   if (lane < low || lane >= high)
12445     {
12446       if (exp)
12447         error ("%K%s %wd out of range %wd - %wd",
12448                exp, desc, lane, low, high - 1);
12449       else
12450         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12451     }
12452 }
12453
12454 /* Bounds-check lanes.  */
12455
12456 void
12457 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12458                   const_tree exp)
12459 {
12460   bounds_check (operand, low, high, exp, "lane");
12461 }
12462
12463 /* Bounds-check constants.  */
12464
12465 void
12466 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12467 {
12468   bounds_check (operand, low, high, NULL_TREE, "constant");
12469 }
12470
12471 HOST_WIDE_INT
12472 neon_element_bits (machine_mode mode)
12473 {
12474   return GET_MODE_UNIT_BITSIZE (mode);
12475 }
12476
12477 \f
12478 /* Predicates for `match_operand' and `match_operator'.  */
12479
12480 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12481    WB is true if full writeback address modes are allowed and is false
12482    if limited writeback address modes (POST_INC and PRE_DEC) are
12483    allowed.  */
12484
12485 int
12486 arm_coproc_mem_operand (rtx op, bool wb)
12487 {
12488   rtx ind;
12489
12490   /* Reject eliminable registers.  */
12491   if (! (reload_in_progress || reload_completed || lra_in_progress)
12492       && (   reg_mentioned_p (frame_pointer_rtx, op)
12493           || reg_mentioned_p (arg_pointer_rtx, op)
12494           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12495           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12496           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12497           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12498     return FALSE;
12499
12500   /* Constants are converted into offsets from labels.  */
12501   if (!MEM_P (op))
12502     return FALSE;
12503
12504   ind = XEXP (op, 0);
12505
12506   if (reload_completed
12507       && (GET_CODE (ind) == LABEL_REF
12508           || (GET_CODE (ind) == CONST
12509               && GET_CODE (XEXP (ind, 0)) == PLUS
12510               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12511               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12512     return TRUE;
12513
12514   /* Match: (mem (reg)).  */
12515   if (REG_P (ind))
12516     return arm_address_register_rtx_p (ind, 0);
12517
12518   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12519      acceptable in any case (subject to verification by
12520      arm_address_register_rtx_p).  We need WB to be true to accept
12521      PRE_INC and POST_DEC.  */
12522   if (GET_CODE (ind) == POST_INC
12523       || GET_CODE (ind) == PRE_DEC
12524       || (wb
12525           && (GET_CODE (ind) == PRE_INC
12526               || GET_CODE (ind) == POST_DEC)))
12527     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12528
12529   if (wb
12530       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12531       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12532       && GET_CODE (XEXP (ind, 1)) == PLUS
12533       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12534     ind = XEXP (ind, 1);
12535
12536   /* Match:
12537      (plus (reg)
12538            (const)).  */
12539   if (GET_CODE (ind) == PLUS
12540       && REG_P (XEXP (ind, 0))
12541       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12542       && CONST_INT_P (XEXP (ind, 1))
12543       && INTVAL (XEXP (ind, 1)) > -1024
12544       && INTVAL (XEXP (ind, 1)) <  1024
12545       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12546     return TRUE;
12547
12548   return FALSE;
12549 }
12550
12551 /* Return TRUE if OP is a memory operand which we can load or store a vector
12552    to/from. TYPE is one of the following values:
12553     0 - Vector load/stor (vldr)
12554     1 - Core registers (ldm)
12555     2 - Element/structure loads (vld1)
12556  */
12557 int
12558 neon_vector_mem_operand (rtx op, int type, bool strict)
12559 {
12560   rtx ind;
12561
12562   /* Reject eliminable registers.  */
12563   if (strict && ! (reload_in_progress || reload_completed)
12564       && (reg_mentioned_p (frame_pointer_rtx, op)
12565           || reg_mentioned_p (arg_pointer_rtx, op)
12566           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12567           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12568           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12569           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12570     return FALSE;
12571
12572   /* Constants are converted into offsets from labels.  */
12573   if (!MEM_P (op))
12574     return FALSE;
12575
12576   ind = XEXP (op, 0);
12577
12578   if (reload_completed
12579       && (GET_CODE (ind) == LABEL_REF
12580           || (GET_CODE (ind) == CONST
12581               && GET_CODE (XEXP (ind, 0)) == PLUS
12582               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12583               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12584     return TRUE;
12585
12586   /* Match: (mem (reg)).  */
12587   if (REG_P (ind))
12588     return arm_address_register_rtx_p (ind, 0);
12589
12590   /* Allow post-increment with Neon registers.  */
12591   if ((type != 1 && GET_CODE (ind) == POST_INC)
12592       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12593     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12594
12595   /* Allow post-increment by register for VLDn */
12596   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12597       && GET_CODE (XEXP (ind, 1)) == PLUS
12598       && REG_P (XEXP (XEXP (ind, 1), 1)))
12599      return true;
12600
12601   /* Match:
12602      (plus (reg)
12603           (const)).  */
12604   if (type == 0
12605       && GET_CODE (ind) == PLUS
12606       && REG_P (XEXP (ind, 0))
12607       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12608       && CONST_INT_P (XEXP (ind, 1))
12609       && INTVAL (XEXP (ind, 1)) > -1024
12610       /* For quad modes, we restrict the constant offset to be slightly less
12611          than what the instruction format permits.  We have no such constraint
12612          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12613       && (INTVAL (XEXP (ind, 1))
12614           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12615       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12616     return TRUE;
12617
12618   return FALSE;
12619 }
12620
12621 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12622    type.  */
12623 int
12624 neon_struct_mem_operand (rtx op)
12625 {
12626   rtx ind;
12627
12628   /* Reject eliminable registers.  */
12629   if (! (reload_in_progress || reload_completed)
12630       && (   reg_mentioned_p (frame_pointer_rtx, op)
12631           || reg_mentioned_p (arg_pointer_rtx, op)
12632           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12633           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12634           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12635           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12636     return FALSE;
12637
12638   /* Constants are converted into offsets from labels.  */
12639   if (!MEM_P (op))
12640     return FALSE;
12641
12642   ind = XEXP (op, 0);
12643
12644   if (reload_completed
12645       && (GET_CODE (ind) == LABEL_REF
12646           || (GET_CODE (ind) == CONST
12647               && GET_CODE (XEXP (ind, 0)) == PLUS
12648               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12649               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12650     return TRUE;
12651
12652   /* Match: (mem (reg)).  */
12653   if (REG_P (ind))
12654     return arm_address_register_rtx_p (ind, 0);
12655
12656   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12657   if (GET_CODE (ind) == POST_INC
12658       || GET_CODE (ind) == PRE_DEC)
12659     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12660
12661   return FALSE;
12662 }
12663
12664 /* Return true if X is a register that will be eliminated later on.  */
12665 int
12666 arm_eliminable_register (rtx x)
12667 {
12668   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12669                        || REGNO (x) == ARG_POINTER_REGNUM
12670                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12671                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12672 }
12673
12674 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12675    coprocessor registers.  Otherwise return NO_REGS.  */
12676
12677 enum reg_class
12678 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12679 {
12680   if (mode == HFmode)
12681     {
12682       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12683         return GENERAL_REGS;
12684       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12685         return NO_REGS;
12686       return GENERAL_REGS;
12687     }
12688
12689   /* The neon move patterns handle all legitimate vector and struct
12690      addresses.  */
12691   if (TARGET_NEON
12692       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12693       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12694           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12695           || VALID_NEON_STRUCT_MODE (mode)))
12696     return NO_REGS;
12697
12698   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12699     return NO_REGS;
12700
12701   return GENERAL_REGS;
12702 }
12703
12704 /* Values which must be returned in the most-significant end of the return
12705    register.  */
12706
12707 static bool
12708 arm_return_in_msb (const_tree valtype)
12709 {
12710   return (TARGET_AAPCS_BASED
12711           && BYTES_BIG_ENDIAN
12712           && (AGGREGATE_TYPE_P (valtype)
12713               || TREE_CODE (valtype) == COMPLEX_TYPE
12714               || FIXED_POINT_TYPE_P (valtype)));
12715 }
12716
12717 /* Return TRUE if X references a SYMBOL_REF.  */
12718 int
12719 symbol_mentioned_p (rtx x)
12720 {
12721   const char * fmt;
12722   int i;
12723
12724   if (GET_CODE (x) == SYMBOL_REF)
12725     return 1;
12726
12727   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12728      are constant offsets, not symbols.  */
12729   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12730     return 0;
12731
12732   fmt = GET_RTX_FORMAT (GET_CODE (x));
12733
12734   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12735     {
12736       if (fmt[i] == 'E')
12737         {
12738           int j;
12739
12740           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12741             if (symbol_mentioned_p (XVECEXP (x, i, j)))
12742               return 1;
12743         }
12744       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12745         return 1;
12746     }
12747
12748   return 0;
12749 }
12750
12751 /* Return TRUE if X references a LABEL_REF.  */
12752 int
12753 label_mentioned_p (rtx x)
12754 {
12755   const char * fmt;
12756   int i;
12757
12758   if (GET_CODE (x) == LABEL_REF)
12759     return 1;
12760
12761   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12762      instruction, but they are constant offsets, not symbols.  */
12763   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12764     return 0;
12765
12766   fmt = GET_RTX_FORMAT (GET_CODE (x));
12767   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12768     {
12769       if (fmt[i] == 'E')
12770         {
12771           int j;
12772
12773           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12774             if (label_mentioned_p (XVECEXP (x, i, j)))
12775               return 1;
12776         }
12777       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12778         return 1;
12779     }
12780
12781   return 0;
12782 }
12783
12784 int
12785 tls_mentioned_p (rtx x)
12786 {
12787   switch (GET_CODE (x))
12788     {
12789     case CONST:
12790       return tls_mentioned_p (XEXP (x, 0));
12791
12792     case UNSPEC:
12793       if (XINT (x, 1) == UNSPEC_TLS)
12794         return 1;
12795
12796     /* Fall through.  */
12797     default:
12798       return 0;
12799     }
12800 }
12801
12802 /* Must not copy any rtx that uses a pc-relative address.
12803    Also, disallow copying of load-exclusive instructions that
12804    may appear after splitting of compare-and-swap-style operations
12805    so as to prevent those loops from being transformed away from their
12806    canonical forms (see PR 69904).  */
12807
12808 static bool
12809 arm_cannot_copy_insn_p (rtx_insn *insn)
12810 {
12811   /* The tls call insn cannot be copied, as it is paired with a data
12812      word.  */
12813   if (recog_memoized (insn) == CODE_FOR_tlscall)
12814     return true;
12815
12816   subrtx_iterator::array_type array;
12817   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12818     {
12819       const_rtx x = *iter;
12820       if (GET_CODE (x) == UNSPEC
12821           && (XINT (x, 1) == UNSPEC_PIC_BASE
12822               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12823         return true;
12824     }
12825
12826   rtx set = single_set (insn);
12827   if (set)
12828     {
12829       rtx src = SET_SRC (set);
12830       if (GET_CODE (src) == ZERO_EXTEND)
12831         src = XEXP (src, 0);
12832
12833       /* Catch the load-exclusive and load-acquire operations.  */
12834       if (GET_CODE (src) == UNSPEC_VOLATILE
12835           && (XINT (src, 1) == VUNSPEC_LL
12836               || XINT (src, 1) == VUNSPEC_LAX))
12837         return true;
12838     }
12839   return false;
12840 }
12841
12842 enum rtx_code
12843 minmax_code (rtx x)
12844 {
12845   enum rtx_code code = GET_CODE (x);
12846
12847   switch (code)
12848     {
12849     case SMAX:
12850       return GE;
12851     case SMIN:
12852       return LE;
12853     case UMIN:
12854       return LEU;
12855     case UMAX:
12856       return GEU;
12857     default:
12858       gcc_unreachable ();
12859     }
12860 }
12861
12862 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12863
12864 bool
12865 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12866                         int *mask, bool *signed_sat)
12867 {
12868   /* The high bound must be a power of two minus one.  */
12869   int log = exact_log2 (INTVAL (hi_bound) + 1);
12870   if (log == -1)
12871     return false;
12872
12873   /* The low bound is either zero (for usat) or one less than the
12874      negation of the high bound (for ssat).  */
12875   if (INTVAL (lo_bound) == 0)
12876     {
12877       if (mask)
12878         *mask = log;
12879       if (signed_sat)
12880         *signed_sat = false;
12881
12882       return true;
12883     }
12884
12885   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12886     {
12887       if (mask)
12888         *mask = log + 1;
12889       if (signed_sat)
12890         *signed_sat = true;
12891
12892       return true;
12893     }
12894
12895   return false;
12896 }
12897
12898 /* Return 1 if memory locations are adjacent.  */
12899 int
12900 adjacent_mem_locations (rtx a, rtx b)
12901 {
12902   /* We don't guarantee to preserve the order of these memory refs.  */
12903   if (volatile_refs_p (a) || volatile_refs_p (b))
12904     return 0;
12905
12906   if ((REG_P (XEXP (a, 0))
12907        || (GET_CODE (XEXP (a, 0)) == PLUS
12908            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12909       && (REG_P (XEXP (b, 0))
12910           || (GET_CODE (XEXP (b, 0)) == PLUS
12911               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12912     {
12913       HOST_WIDE_INT val0 = 0, val1 = 0;
12914       rtx reg0, reg1;
12915       int val_diff;
12916
12917       if (GET_CODE (XEXP (a, 0)) == PLUS)
12918         {
12919           reg0 = XEXP (XEXP (a, 0), 0);
12920           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12921         }
12922       else
12923         reg0 = XEXP (a, 0);
12924
12925       if (GET_CODE (XEXP (b, 0)) == PLUS)
12926         {
12927           reg1 = XEXP (XEXP (b, 0), 0);
12928           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12929         }
12930       else
12931         reg1 = XEXP (b, 0);
12932
12933       /* Don't accept any offset that will require multiple
12934          instructions to handle, since this would cause the
12935          arith_adjacentmem pattern to output an overlong sequence.  */
12936       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12937         return 0;
12938
12939       /* Don't allow an eliminable register: register elimination can make
12940          the offset too large.  */
12941       if (arm_eliminable_register (reg0))
12942         return 0;
12943
12944       val_diff = val1 - val0;
12945
12946       if (arm_ld_sched)
12947         {
12948           /* If the target has load delay slots, then there's no benefit
12949              to using an ldm instruction unless the offset is zero and
12950              we are optimizing for size.  */
12951           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12952                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12953                   && (val_diff == 4 || val_diff == -4));
12954         }
12955
12956       return ((REGNO (reg0) == REGNO (reg1))
12957               && (val_diff == 4 || val_diff == -4));
12958     }
12959
12960   return 0;
12961 }
12962
12963 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12964    for load operations, false for store operations.  CONSECUTIVE is true
12965    if the register numbers in the operation must be consecutive in the register
12966    bank. RETURN_PC is true if value is to be loaded in PC.
12967    The pattern we are trying to match for load is:
12968      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12969       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12970        :
12971        :
12972       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12973      ]
12974      where
12975      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12976      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12977      3.  If consecutive is TRUE, then for kth register being loaded,
12978          REGNO (R_dk) = REGNO (R_d0) + k.
12979    The pattern for store is similar.  */
12980 bool
12981 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12982                      bool consecutive, bool return_pc)
12983 {
12984   HOST_WIDE_INT count = XVECLEN (op, 0);
12985   rtx reg, mem, addr;
12986   unsigned regno;
12987   unsigned first_regno;
12988   HOST_WIDE_INT i = 1, base = 0, offset = 0;
12989   rtx elt;
12990   bool addr_reg_in_reglist = false;
12991   bool update = false;
12992   int reg_increment;
12993   int offset_adj;
12994   int regs_per_val;
12995
12996   /* If not in SImode, then registers must be consecutive
12997      (e.g., VLDM instructions for DFmode).  */
12998   gcc_assert ((mode == SImode) || consecutive);
12999   /* Setting return_pc for stores is illegal.  */
13000   gcc_assert (!return_pc || load);
13001
13002   /* Set up the increments and the regs per val based on the mode.  */
13003   reg_increment = GET_MODE_SIZE (mode);
13004   regs_per_val = reg_increment / 4;
13005   offset_adj = return_pc ? 1 : 0;
13006
13007   if (count <= 1
13008       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13009       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13010     return false;
13011
13012   /* Check if this is a write-back.  */
13013   elt = XVECEXP (op, 0, offset_adj);
13014   if (GET_CODE (SET_SRC (elt)) == PLUS)
13015     {
13016       i++;
13017       base = 1;
13018       update = true;
13019
13020       /* The offset adjustment must be the number of registers being
13021          popped times the size of a single register.  */
13022       if (!REG_P (SET_DEST (elt))
13023           || !REG_P (XEXP (SET_SRC (elt), 0))
13024           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13025           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13026           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13027              ((count - 1 - offset_adj) * reg_increment))
13028         return false;
13029     }
13030
13031   i = i + offset_adj;
13032   base = base + offset_adj;
13033   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13034      success depends on the type: VLDM can do just one reg,
13035      LDM must do at least two.  */
13036   if ((count <= i) && (mode == SImode))
13037       return false;
13038
13039   elt = XVECEXP (op, 0, i - 1);
13040   if (GET_CODE (elt) != SET)
13041     return false;
13042
13043   if (load)
13044     {
13045       reg = SET_DEST (elt);
13046       mem = SET_SRC (elt);
13047     }
13048   else
13049     {
13050       reg = SET_SRC (elt);
13051       mem = SET_DEST (elt);
13052     }
13053
13054   if (!REG_P (reg) || !MEM_P (mem))
13055     return false;
13056
13057   regno = REGNO (reg);
13058   first_regno = regno;
13059   addr = XEXP (mem, 0);
13060   if (GET_CODE (addr) == PLUS)
13061     {
13062       if (!CONST_INT_P (XEXP (addr, 1)))
13063         return false;
13064
13065       offset = INTVAL (XEXP (addr, 1));
13066       addr = XEXP (addr, 0);
13067     }
13068
13069   if (!REG_P (addr))
13070     return false;
13071
13072   /* Don't allow SP to be loaded unless it is also the base register. It
13073      guarantees that SP is reset correctly when an LDM instruction
13074      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13075   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13076     return false;
13077
13078   for (; i < count; i++)
13079     {
13080       elt = XVECEXP (op, 0, i);
13081       if (GET_CODE (elt) != SET)
13082         return false;
13083
13084       if (load)
13085         {
13086           reg = SET_DEST (elt);
13087           mem = SET_SRC (elt);
13088         }
13089       else
13090         {
13091           reg = SET_SRC (elt);
13092           mem = SET_DEST (elt);
13093         }
13094
13095       if (!REG_P (reg)
13096           || GET_MODE (reg) != mode
13097           || REGNO (reg) <= regno
13098           || (consecutive
13099               && (REGNO (reg) !=
13100                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13101           /* Don't allow SP to be loaded unless it is also the base register. It
13102              guarantees that SP is reset correctly when an LDM instruction
13103              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13104           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13105           || !MEM_P (mem)
13106           || GET_MODE (mem) != mode
13107           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13108                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13109                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13110                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13111                    offset + (i - base) * reg_increment))
13112               && (!REG_P (XEXP (mem, 0))
13113                   || offset + (i - base) * reg_increment != 0)))
13114         return false;
13115
13116       regno = REGNO (reg);
13117       if (regno == REGNO (addr))
13118         addr_reg_in_reglist = true;
13119     }
13120
13121   if (load)
13122     {
13123       if (update && addr_reg_in_reglist)
13124         return false;
13125
13126       /* For Thumb-1, address register is always modified - either by write-back
13127          or by explicit load.  If the pattern does not describe an update,
13128          then the address register must be in the list of loaded registers.  */
13129       if (TARGET_THUMB1)
13130         return update || addr_reg_in_reglist;
13131     }
13132
13133   return true;
13134 }
13135
13136 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13137    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13138    instruction.  ADD_OFFSET is nonzero if the base address register needs
13139    to be modified with an add instruction before we can use it.  */
13140
13141 static bool
13142 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13143                                  int nops, HOST_WIDE_INT add_offset)
13144  {
13145   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13146      if the offset isn't small enough.  The reason 2 ldrs are faster
13147      is because these ARMs are able to do more than one cache access
13148      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13149      whilst the ARM8 has a double bandwidth cache.  This means that
13150      these cores can do both an instruction fetch and a data fetch in
13151      a single cycle, so the trick of calculating the address into a
13152      scratch register (one of the result regs) and then doing a load
13153      multiple actually becomes slower (and no smaller in code size).
13154      That is the transformation
13155
13156         ldr     rd1, [rbase + offset]
13157         ldr     rd2, [rbase + offset + 4]
13158
13159      to
13160
13161         add     rd1, rbase, offset
13162         ldmia   rd1, {rd1, rd2}
13163
13164      produces worse code -- '3 cycles + any stalls on rd2' instead of
13165      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13166      access per cycle, the first sequence could never complete in less
13167      than 6 cycles, whereas the ldm sequence would only take 5 and
13168      would make better use of sequential accesses if not hitting the
13169      cache.
13170
13171      We cheat here and test 'arm_ld_sched' which we currently know to
13172      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13173      changes, then the test below needs to be reworked.  */
13174   if (nops == 2 && arm_ld_sched && add_offset != 0)
13175     return false;
13176
13177   /* XScale has load-store double instructions, but they have stricter
13178      alignment requirements than load-store multiple, so we cannot
13179      use them.
13180
13181      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13182      the pipeline until completion.
13183
13184         NREGS           CYCLES
13185           1               3
13186           2               4
13187           3               5
13188           4               6
13189
13190      An ldr instruction takes 1-3 cycles, but does not block the
13191      pipeline.
13192
13193         NREGS           CYCLES
13194           1              1-3
13195           2              2-6
13196           3              3-9
13197           4              4-12
13198
13199      Best case ldr will always win.  However, the more ldr instructions
13200      we issue, the less likely we are to be able to schedule them well.
13201      Using ldr instructions also increases code size.
13202
13203      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13204      for counts of 3 or 4 regs.  */
13205   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13206     return false;
13207   return true;
13208 }
13209
13210 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13211    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13212    an array ORDER which describes the sequence to use when accessing the
13213    offsets that produces an ascending order.  In this sequence, each
13214    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13215    must have been filled in with the lowest offset by the caller.
13216    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13217    we use to verify that ORDER produces an ascending order of registers.
13218    Return true if it was possible to construct such an order, false if
13219    not.  */
13220
13221 static bool
13222 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13223                       int *unsorted_regs)
13224 {
13225   int i;
13226   for (i = 1; i < nops; i++)
13227     {
13228       int j;
13229
13230       order[i] = order[i - 1];
13231       for (j = 0; j < nops; j++)
13232         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13233           {
13234             /* We must find exactly one offset that is higher than the
13235                previous one by 4.  */
13236             if (order[i] != order[i - 1])
13237               return false;
13238             order[i] = j;
13239           }
13240       if (order[i] == order[i - 1])
13241         return false;
13242       /* The register numbers must be ascending.  */
13243       if (unsorted_regs != NULL
13244           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13245         return false;
13246     }
13247   return true;
13248 }
13249
13250 /* Used to determine in a peephole whether a sequence of load
13251    instructions can be changed into a load-multiple instruction.
13252    NOPS is the number of separate load instructions we are examining.  The
13253    first NOPS entries in OPERANDS are the destination registers, the
13254    next NOPS entries are memory operands.  If this function is
13255    successful, *BASE is set to the common base register of the memory
13256    accesses; *LOAD_OFFSET is set to the first memory location's offset
13257    from that base register.
13258    REGS is an array filled in with the destination register numbers.
13259    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13260    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13261    the sequence of registers in REGS matches the loads from ascending memory
13262    locations, and the function verifies that the register numbers are
13263    themselves ascending.  If CHECK_REGS is false, the register numbers
13264    are stored in the order they are found in the operands.  */
13265 static int
13266 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13267                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13268 {
13269   int unsorted_regs[MAX_LDM_STM_OPS];
13270   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13271   int order[MAX_LDM_STM_OPS];
13272   rtx base_reg_rtx = NULL;
13273   int base_reg = -1;
13274   int i, ldm_case;
13275
13276   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13277      easily extended if required.  */
13278   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13279
13280   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13281
13282   /* Loop over the operands and check that the memory references are
13283      suitable (i.e. immediate offsets from the same base register).  At
13284      the same time, extract the target register, and the memory
13285      offsets.  */
13286   for (i = 0; i < nops; i++)
13287     {
13288       rtx reg;
13289       rtx offset;
13290
13291       /* Convert a subreg of a mem into the mem itself.  */
13292       if (GET_CODE (operands[nops + i]) == SUBREG)
13293         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13294
13295       gcc_assert (MEM_P (operands[nops + i]));
13296
13297       /* Don't reorder volatile memory references; it doesn't seem worth
13298          looking for the case where the order is ok anyway.  */
13299       if (MEM_VOLATILE_P (operands[nops + i]))
13300         return 0;
13301
13302       offset = const0_rtx;
13303
13304       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13305            || (GET_CODE (reg) == SUBREG
13306                && REG_P (reg = SUBREG_REG (reg))))
13307           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13308               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13309                   || (GET_CODE (reg) == SUBREG
13310                       && REG_P (reg = SUBREG_REG (reg))))
13311               && (CONST_INT_P (offset
13312                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13313         {
13314           if (i == 0)
13315             {
13316               base_reg = REGNO (reg);
13317               base_reg_rtx = reg;
13318               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13319                 return 0;
13320             }
13321           else if (base_reg != (int) REGNO (reg))
13322             /* Not addressed from the same base register.  */
13323             return 0;
13324
13325           unsorted_regs[i] = (REG_P (operands[i])
13326                               ? REGNO (operands[i])
13327                               : REGNO (SUBREG_REG (operands[i])));
13328
13329           /* If it isn't an integer register, or if it overwrites the
13330              base register but isn't the last insn in the list, then
13331              we can't do this.  */
13332           if (unsorted_regs[i] < 0
13333               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13334               || unsorted_regs[i] > 14
13335               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13336             return 0;
13337
13338           /* Don't allow SP to be loaded unless it is also the base
13339              register.  It guarantees that SP is reset correctly when
13340              an LDM instruction is interrupted.  Otherwise, we might
13341              end up with a corrupt stack.  */
13342           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13343             return 0;
13344
13345           unsorted_offsets[i] = INTVAL (offset);
13346           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13347             order[0] = i;
13348         }
13349       else
13350         /* Not a suitable memory address.  */
13351         return 0;
13352     }
13353
13354   /* All the useful information has now been extracted from the
13355      operands into unsorted_regs and unsorted_offsets; additionally,
13356      order[0] has been set to the lowest offset in the list.  Sort
13357      the offsets into order, verifying that they are adjacent, and
13358      check that the register numbers are ascending.  */
13359   if (!compute_offset_order (nops, unsorted_offsets, order,
13360                              check_regs ? unsorted_regs : NULL))
13361     return 0;
13362
13363   if (saved_order)
13364     memcpy (saved_order, order, sizeof order);
13365
13366   if (base)
13367     {
13368       *base = base_reg;
13369
13370       for (i = 0; i < nops; i++)
13371         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13372
13373       *load_offset = unsorted_offsets[order[0]];
13374     }
13375
13376   if (TARGET_THUMB1
13377       && !peep2_reg_dead_p (nops, base_reg_rtx))
13378     return 0;
13379
13380   if (unsorted_offsets[order[0]] == 0)
13381     ldm_case = 1; /* ldmia */
13382   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13383     ldm_case = 2; /* ldmib */
13384   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13385     ldm_case = 3; /* ldmda */
13386   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13387     ldm_case = 4; /* ldmdb */
13388   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13389            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13390     ldm_case = 5;
13391   else
13392     return 0;
13393
13394   if (!multiple_operation_profitable_p (false, nops,
13395                                         ldm_case == 5
13396                                         ? unsorted_offsets[order[0]] : 0))
13397     return 0;
13398
13399   return ldm_case;
13400 }
13401
13402 /* Used to determine in a peephole whether a sequence of store instructions can
13403    be changed into a store-multiple instruction.
13404    NOPS is the number of separate store instructions we are examining.
13405    NOPS_TOTAL is the total number of instructions recognized by the peephole
13406    pattern.
13407    The first NOPS entries in OPERANDS are the source registers, the next
13408    NOPS entries are memory operands.  If this function is successful, *BASE is
13409    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13410    to the first memory location's offset from that base register.  REGS is an
13411    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13412    likewise filled with the corresponding rtx's.
13413    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13414    numbers to an ascending order of stores.
13415    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13416    from ascending memory locations, and the function verifies that the register
13417    numbers are themselves ascending.  If CHECK_REGS is false, the register
13418    numbers are stored in the order they are found in the operands.  */
13419 static int
13420 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13421                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13422                          HOST_WIDE_INT *load_offset, bool check_regs)
13423 {
13424   int unsorted_regs[MAX_LDM_STM_OPS];
13425   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13426   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13427   int order[MAX_LDM_STM_OPS];
13428   int base_reg = -1;
13429   rtx base_reg_rtx = NULL;
13430   int i, stm_case;
13431
13432   /* Write back of base register is currently only supported for Thumb 1.  */
13433   int base_writeback = TARGET_THUMB1;
13434
13435   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13436      easily extended if required.  */
13437   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13438
13439   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13440
13441   /* Loop over the operands and check that the memory references are
13442      suitable (i.e. immediate offsets from the same base register).  At
13443      the same time, extract the target register, and the memory
13444      offsets.  */
13445   for (i = 0; i < nops; i++)
13446     {
13447       rtx reg;
13448       rtx offset;
13449
13450       /* Convert a subreg of a mem into the mem itself.  */
13451       if (GET_CODE (operands[nops + i]) == SUBREG)
13452         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13453
13454       gcc_assert (MEM_P (operands[nops + i]));
13455
13456       /* Don't reorder volatile memory references; it doesn't seem worth
13457          looking for the case where the order is ok anyway.  */
13458       if (MEM_VOLATILE_P (operands[nops + i]))
13459         return 0;
13460
13461       offset = const0_rtx;
13462
13463       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13464            || (GET_CODE (reg) == SUBREG
13465                && REG_P (reg = SUBREG_REG (reg))))
13466           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13467               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13468                   || (GET_CODE (reg) == SUBREG
13469                       && REG_P (reg = SUBREG_REG (reg))))
13470               && (CONST_INT_P (offset
13471                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13472         {
13473           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13474                                   ? operands[i] : SUBREG_REG (operands[i]));
13475           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13476
13477           if (i == 0)
13478             {
13479               base_reg = REGNO (reg);
13480               base_reg_rtx = reg;
13481               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13482                 return 0;
13483             }
13484           else if (base_reg != (int) REGNO (reg))
13485             /* Not addressed from the same base register.  */
13486             return 0;
13487
13488           /* If it isn't an integer register, then we can't do this.  */
13489           if (unsorted_regs[i] < 0
13490               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13491               /* The effects are unpredictable if the base register is
13492                  both updated and stored.  */
13493               || (base_writeback && unsorted_regs[i] == base_reg)
13494               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13495               || unsorted_regs[i] > 14)
13496             return 0;
13497
13498           unsorted_offsets[i] = INTVAL (offset);
13499           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13500             order[0] = i;
13501         }
13502       else
13503         /* Not a suitable memory address.  */
13504         return 0;
13505     }
13506
13507   /* All the useful information has now been extracted from the
13508      operands into unsorted_regs and unsorted_offsets; additionally,
13509      order[0] has been set to the lowest offset in the list.  Sort
13510      the offsets into order, verifying that they are adjacent, and
13511      check that the register numbers are ascending.  */
13512   if (!compute_offset_order (nops, unsorted_offsets, order,
13513                              check_regs ? unsorted_regs : NULL))
13514     return 0;
13515
13516   if (saved_order)
13517     memcpy (saved_order, order, sizeof order);
13518
13519   if (base)
13520     {
13521       *base = base_reg;
13522
13523       for (i = 0; i < nops; i++)
13524         {
13525           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13526           if (reg_rtxs)
13527             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13528         }
13529
13530       *load_offset = unsorted_offsets[order[0]];
13531     }
13532
13533   if (TARGET_THUMB1
13534       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13535     return 0;
13536
13537   if (unsorted_offsets[order[0]] == 0)
13538     stm_case = 1; /* stmia */
13539   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13540     stm_case = 2; /* stmib */
13541   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13542     stm_case = 3; /* stmda */
13543   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13544     stm_case = 4; /* stmdb */
13545   else
13546     return 0;
13547
13548   if (!multiple_operation_profitable_p (false, nops, 0))
13549     return 0;
13550
13551   return stm_case;
13552 }
13553 \f
13554 /* Routines for use in generating RTL.  */
13555
13556 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13557    the instruction; REGS and MEMS are arrays containing the operands.
13558    BASEREG is the base register to be used in addressing the memory operands.
13559    WBACK_OFFSET is nonzero if the instruction should update the base
13560    register.  */
13561
13562 static rtx
13563 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13564                          HOST_WIDE_INT wback_offset)
13565 {
13566   int i = 0, j;
13567   rtx result;
13568
13569   if (!multiple_operation_profitable_p (false, count, 0))
13570     {
13571       rtx seq;
13572
13573       start_sequence ();
13574
13575       for (i = 0; i < count; i++)
13576         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13577
13578       if (wback_offset != 0)
13579         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13580
13581       seq = get_insns ();
13582       end_sequence ();
13583
13584       return seq;
13585     }
13586
13587   result = gen_rtx_PARALLEL (VOIDmode,
13588                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13589   if (wback_offset != 0)
13590     {
13591       XVECEXP (result, 0, 0)
13592         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13593       i = 1;
13594       count++;
13595     }
13596
13597   for (j = 0; i < count; i++, j++)
13598     XVECEXP (result, 0, i)
13599       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13600
13601   return result;
13602 }
13603
13604 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13605    the instruction; REGS and MEMS are arrays containing the operands.
13606    BASEREG is the base register to be used in addressing the memory operands.
13607    WBACK_OFFSET is nonzero if the instruction should update the base
13608    register.  */
13609
13610 static rtx
13611 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13612                           HOST_WIDE_INT wback_offset)
13613 {
13614   int i = 0, j;
13615   rtx result;
13616
13617   if (GET_CODE (basereg) == PLUS)
13618     basereg = XEXP (basereg, 0);
13619
13620   if (!multiple_operation_profitable_p (false, count, 0))
13621     {
13622       rtx seq;
13623
13624       start_sequence ();
13625
13626       for (i = 0; i < count; i++)
13627         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13628
13629       if (wback_offset != 0)
13630         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13631
13632       seq = get_insns ();
13633       end_sequence ();
13634
13635       return seq;
13636     }
13637
13638   result = gen_rtx_PARALLEL (VOIDmode,
13639                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13640   if (wback_offset != 0)
13641     {
13642       XVECEXP (result, 0, 0)
13643         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13644       i = 1;
13645       count++;
13646     }
13647
13648   for (j = 0; i < count; i++, j++)
13649     XVECEXP (result, 0, i)
13650       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13651
13652   return result;
13653 }
13654
13655 /* Generate either a load-multiple or a store-multiple instruction.  This
13656    function can be used in situations where we can start with a single MEM
13657    rtx and adjust its address upwards.
13658    COUNT is the number of operations in the instruction, not counting a
13659    possible update of the base register.  REGS is an array containing the
13660    register operands.
13661    BASEREG is the base register to be used in addressing the memory operands,
13662    which are constructed from BASEMEM.
13663    WRITE_BACK specifies whether the generated instruction should include an
13664    update of the base register.
13665    OFFSETP is used to pass an offset to and from this function; this offset
13666    is not used when constructing the address (instead BASEMEM should have an
13667    appropriate offset in its address), it is used only for setting
13668    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13669
13670 static rtx
13671 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13672                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13673 {
13674   rtx mems[MAX_LDM_STM_OPS];
13675   HOST_WIDE_INT offset = *offsetp;
13676   int i;
13677
13678   gcc_assert (count <= MAX_LDM_STM_OPS);
13679
13680   if (GET_CODE (basereg) == PLUS)
13681     basereg = XEXP (basereg, 0);
13682
13683   for (i = 0; i < count; i++)
13684     {
13685       rtx addr = plus_constant (Pmode, basereg, i * 4);
13686       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13687       offset += 4;
13688     }
13689
13690   if (write_back)
13691     *offsetp = offset;
13692
13693   if (is_load)
13694     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13695                                     write_back ? 4 * count : 0);
13696   else
13697     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13698                                      write_back ? 4 * count : 0);
13699 }
13700
13701 rtx
13702 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13703                        rtx basemem, HOST_WIDE_INT *offsetp)
13704 {
13705   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13706                               offsetp);
13707 }
13708
13709 rtx
13710 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13711                         rtx basemem, HOST_WIDE_INT *offsetp)
13712 {
13713   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13714                               offsetp);
13715 }
13716
13717 /* Called from a peephole2 expander to turn a sequence of loads into an
13718    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13719    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13720    is true if we can reorder the registers because they are used commutatively
13721    subsequently.
13722    Returns true iff we could generate a new instruction.  */
13723
13724 bool
13725 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13726 {
13727   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13728   rtx mems[MAX_LDM_STM_OPS];
13729   int i, j, base_reg;
13730   rtx base_reg_rtx;
13731   HOST_WIDE_INT offset;
13732   int write_back = FALSE;
13733   int ldm_case;
13734   rtx addr;
13735
13736   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13737                                      &base_reg, &offset, !sort_regs);
13738
13739   if (ldm_case == 0)
13740     return false;
13741
13742   if (sort_regs)
13743     for (i = 0; i < nops - 1; i++)
13744       for (j = i + 1; j < nops; j++)
13745         if (regs[i] > regs[j])
13746           {
13747             int t = regs[i];
13748             regs[i] = regs[j];
13749             regs[j] = t;
13750           }
13751   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13752
13753   if (TARGET_THUMB1)
13754     {
13755       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13756       gcc_assert (ldm_case == 1 || ldm_case == 5);
13757       write_back = TRUE;
13758     }
13759
13760   if (ldm_case == 5)
13761     {
13762       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13763       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13764       offset = 0;
13765       if (!TARGET_THUMB1)
13766         base_reg_rtx = newbase;
13767     }
13768
13769   for (i = 0; i < nops; i++)
13770     {
13771       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13772       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13773                                               SImode, addr, 0);
13774     }
13775   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13776                                       write_back ? offset + i * 4 : 0));
13777   return true;
13778 }
13779
13780 /* Called from a peephole2 expander to turn a sequence of stores into an
13781    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13782    NOPS indicates how many separate stores we are trying to combine.
13783    Returns true iff we could generate a new instruction.  */
13784
13785 bool
13786 gen_stm_seq (rtx *operands, int nops)
13787 {
13788   int i;
13789   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13790   rtx mems[MAX_LDM_STM_OPS];
13791   int base_reg;
13792   rtx base_reg_rtx;
13793   HOST_WIDE_INT offset;
13794   int write_back = FALSE;
13795   int stm_case;
13796   rtx addr;
13797   bool base_reg_dies;
13798
13799   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13800                                       mem_order, &base_reg, &offset, true);
13801
13802   if (stm_case == 0)
13803     return false;
13804
13805   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13806
13807   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13808   if (TARGET_THUMB1)
13809     {
13810       gcc_assert (base_reg_dies);
13811       write_back = TRUE;
13812     }
13813
13814   if (stm_case == 5)
13815     {
13816       gcc_assert (base_reg_dies);
13817       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13818       offset = 0;
13819     }
13820
13821   addr = plus_constant (Pmode, base_reg_rtx, offset);
13822
13823   for (i = 0; i < nops; i++)
13824     {
13825       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13826       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13827                                               SImode, addr, 0);
13828     }
13829   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13830                                        write_back ? offset + i * 4 : 0));
13831   return true;
13832 }
13833
13834 /* Called from a peephole2 expander to turn a sequence of stores that are
13835    preceded by constant loads into an STM instruction.  OPERANDS are the
13836    operands found by the peephole matcher; NOPS indicates how many
13837    separate stores we are trying to combine; there are 2 * NOPS
13838    instructions in the peephole.
13839    Returns true iff we could generate a new instruction.  */
13840
13841 bool
13842 gen_const_stm_seq (rtx *operands, int nops)
13843 {
13844   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13845   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13846   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13847   rtx mems[MAX_LDM_STM_OPS];
13848   int base_reg;
13849   rtx base_reg_rtx;
13850   HOST_WIDE_INT offset;
13851   int write_back = FALSE;
13852   int stm_case;
13853   rtx addr;
13854   bool base_reg_dies;
13855   int i, j;
13856   HARD_REG_SET allocated;
13857
13858   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13859                                       mem_order, &base_reg, &offset, false);
13860
13861   if (stm_case == 0)
13862     return false;
13863
13864   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13865
13866   /* If the same register is used more than once, try to find a free
13867      register.  */
13868   CLEAR_HARD_REG_SET (allocated);
13869   for (i = 0; i < nops; i++)
13870     {
13871       for (j = i + 1; j < nops; j++)
13872         if (regs[i] == regs[j])
13873           {
13874             rtx t = peep2_find_free_register (0, nops * 2,
13875                                               TARGET_THUMB1 ? "l" : "r",
13876                                               SImode, &allocated);
13877             if (t == NULL_RTX)
13878               return false;
13879             reg_rtxs[i] = t;
13880             regs[i] = REGNO (t);
13881           }
13882     }
13883
13884   /* Compute an ordering that maps the register numbers to an ascending
13885      sequence.  */
13886   reg_order[0] = 0;
13887   for (i = 0; i < nops; i++)
13888     if (regs[i] < regs[reg_order[0]])
13889       reg_order[0] = i;
13890
13891   for (i = 1; i < nops; i++)
13892     {
13893       int this_order = reg_order[i - 1];
13894       for (j = 0; j < nops; j++)
13895         if (regs[j] > regs[reg_order[i - 1]]
13896             && (this_order == reg_order[i - 1]
13897                 || regs[j] < regs[this_order]))
13898           this_order = j;
13899       reg_order[i] = this_order;
13900     }
13901
13902   /* Ensure that registers that must be live after the instruction end
13903      up with the correct value.  */
13904   for (i = 0; i < nops; i++)
13905     {
13906       int this_order = reg_order[i];
13907       if ((this_order != mem_order[i]
13908            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13909           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13910         return false;
13911     }
13912
13913   /* Load the constants.  */
13914   for (i = 0; i < nops; i++)
13915     {
13916       rtx op = operands[2 * nops + mem_order[i]];
13917       sorted_regs[i] = regs[reg_order[i]];
13918       emit_move_insn (reg_rtxs[reg_order[i]], op);
13919     }
13920
13921   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13922
13923   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13924   if (TARGET_THUMB1)
13925     {
13926       gcc_assert (base_reg_dies);
13927       write_back = TRUE;
13928     }
13929
13930   if (stm_case == 5)
13931     {
13932       gcc_assert (base_reg_dies);
13933       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13934       offset = 0;
13935     }
13936
13937   addr = plus_constant (Pmode, base_reg_rtx, offset);
13938
13939   for (i = 0; i < nops; i++)
13940     {
13941       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13942       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13943                                               SImode, addr, 0);
13944     }
13945   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13946                                        write_back ? offset + i * 4 : 0));
13947   return true;
13948 }
13949
13950 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13951    unaligned copies on processors which support unaligned semantics for those
13952    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13953    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13954    An interleave factor of 1 (the minimum) will perform no interleaving.
13955    Load/store multiple are used for aligned addresses where possible.  */
13956
13957 static void
13958 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13959                                    HOST_WIDE_INT length,
13960                                    unsigned int interleave_factor)
13961 {
13962   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13963   int *regnos = XALLOCAVEC (int, interleave_factor);
13964   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13965   HOST_WIDE_INT i, j;
13966   HOST_WIDE_INT remaining = length, words;
13967   rtx halfword_tmp = NULL, byte_tmp = NULL;
13968   rtx dst, src;
13969   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13970   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13971   HOST_WIDE_INT srcoffset, dstoffset;
13972   HOST_WIDE_INT src_autoinc, dst_autoinc;
13973   rtx mem, addr;
13974
13975   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13976
13977   /* Use hard registers if we have aligned source or destination so we can use
13978      load/store multiple with contiguous registers.  */
13979   if (dst_aligned || src_aligned)
13980     for (i = 0; i < interleave_factor; i++)
13981       regs[i] = gen_rtx_REG (SImode, i);
13982   else
13983     for (i = 0; i < interleave_factor; i++)
13984       regs[i] = gen_reg_rtx (SImode);
13985
13986   dst = copy_addr_to_reg (XEXP (dstbase, 0));
13987   src = copy_addr_to_reg (XEXP (srcbase, 0));
13988
13989   srcoffset = dstoffset = 0;
13990
13991   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13992      For copying the last bytes we want to subtract this offset again.  */
13993   src_autoinc = dst_autoinc = 0;
13994
13995   for (i = 0; i < interleave_factor; i++)
13996     regnos[i] = i;
13997
13998   /* Copy BLOCK_SIZE_BYTES chunks.  */
13999
14000   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14001     {
14002       /* Load words.  */
14003       if (src_aligned && interleave_factor > 1)
14004         {
14005           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14006                                             TRUE, srcbase, &srcoffset));
14007           src_autoinc += UNITS_PER_WORD * interleave_factor;
14008         }
14009       else
14010         {
14011           for (j = 0; j < interleave_factor; j++)
14012             {
14013               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14014                                                  - src_autoinc));
14015               mem = adjust_automodify_address (srcbase, SImode, addr,
14016                                                srcoffset + j * UNITS_PER_WORD);
14017               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14018             }
14019           srcoffset += block_size_bytes;
14020         }
14021
14022       /* Store words.  */
14023       if (dst_aligned && interleave_factor > 1)
14024         {
14025           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14026                                              TRUE, dstbase, &dstoffset));
14027           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14028         }
14029       else
14030         {
14031           for (j = 0; j < interleave_factor; j++)
14032             {
14033               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14034                                                  - dst_autoinc));
14035               mem = adjust_automodify_address (dstbase, SImode, addr,
14036                                                dstoffset + j * UNITS_PER_WORD);
14037               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14038             }
14039           dstoffset += block_size_bytes;
14040         }
14041
14042       remaining -= block_size_bytes;
14043     }
14044
14045   /* Copy any whole words left (note these aren't interleaved with any
14046      subsequent halfword/byte load/stores in the interests of simplicity).  */
14047
14048   words = remaining / UNITS_PER_WORD;
14049
14050   gcc_assert (words < interleave_factor);
14051
14052   if (src_aligned && words > 1)
14053     {
14054       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14055                                         &srcoffset));
14056       src_autoinc += UNITS_PER_WORD * words;
14057     }
14058   else
14059     {
14060       for (j = 0; j < words; j++)
14061         {
14062           addr = plus_constant (Pmode, src,
14063                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14064           mem = adjust_automodify_address (srcbase, SImode, addr,
14065                                            srcoffset + j * UNITS_PER_WORD);
14066           if (src_aligned)
14067             emit_move_insn (regs[j], mem);
14068           else
14069             emit_insn (gen_unaligned_loadsi (regs[j], mem));
14070         }
14071       srcoffset += words * UNITS_PER_WORD;
14072     }
14073
14074   if (dst_aligned && words > 1)
14075     {
14076       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14077                                          &dstoffset));
14078       dst_autoinc += words * UNITS_PER_WORD;
14079     }
14080   else
14081     {
14082       for (j = 0; j < words; j++)
14083         {
14084           addr = plus_constant (Pmode, dst,
14085                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14086           mem = adjust_automodify_address (dstbase, SImode, addr,
14087                                            dstoffset + j * UNITS_PER_WORD);
14088           if (dst_aligned)
14089             emit_move_insn (mem, regs[j]);
14090           else
14091             emit_insn (gen_unaligned_storesi (mem, regs[j]));
14092         }
14093       dstoffset += words * UNITS_PER_WORD;
14094     }
14095
14096   remaining -= words * UNITS_PER_WORD;
14097
14098   gcc_assert (remaining < 4);
14099
14100   /* Copy a halfword if necessary.  */
14101
14102   if (remaining >= 2)
14103     {
14104       halfword_tmp = gen_reg_rtx (SImode);
14105
14106       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14107       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14108       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14109
14110       /* Either write out immediately, or delay until we've loaded the last
14111          byte, depending on interleave factor.  */
14112       if (interleave_factor == 1)
14113         {
14114           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14115           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14116           emit_insn (gen_unaligned_storehi (mem,
14117                        gen_lowpart (HImode, halfword_tmp)));
14118           halfword_tmp = NULL;
14119           dstoffset += 2;
14120         }
14121
14122       remaining -= 2;
14123       srcoffset += 2;
14124     }
14125
14126   gcc_assert (remaining < 2);
14127
14128   /* Copy last byte.  */
14129
14130   if ((remaining & 1) != 0)
14131     {
14132       byte_tmp = gen_reg_rtx (SImode);
14133
14134       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14135       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14136       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14137
14138       if (interleave_factor == 1)
14139         {
14140           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14141           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14142           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14143           byte_tmp = NULL;
14144           dstoffset++;
14145         }
14146
14147       remaining--;
14148       srcoffset++;
14149     }
14150
14151   /* Store last halfword if we haven't done so already.  */
14152
14153   if (halfword_tmp)
14154     {
14155       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14156       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14157       emit_insn (gen_unaligned_storehi (mem,
14158                    gen_lowpart (HImode, halfword_tmp)));
14159       dstoffset += 2;
14160     }
14161
14162   /* Likewise for last byte.  */
14163
14164   if (byte_tmp)
14165     {
14166       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14167       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14168       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14169       dstoffset++;
14170     }
14171
14172   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14173 }
14174
14175 /* From mips_adjust_block_mem:
14176
14177    Helper function for doing a loop-based block operation on memory
14178    reference MEM.  Each iteration of the loop will operate on LENGTH
14179    bytes of MEM.
14180
14181    Create a new base register for use within the loop and point it to
14182    the start of MEM.  Create a new memory reference that uses this
14183    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14184
14185 static void
14186 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14187                       rtx *loop_mem)
14188 {
14189   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14190
14191   /* Although the new mem does not refer to a known location,
14192      it does keep up to LENGTH bytes of alignment.  */
14193   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14194   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14195 }
14196
14197 /* From mips_block_move_loop:
14198
14199    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14200    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14201    the memory regions do not overlap.  */
14202
14203 static void
14204 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14205                                unsigned int interleave_factor,
14206                                HOST_WIDE_INT bytes_per_iter)
14207 {
14208   rtx src_reg, dest_reg, final_src, test;
14209   HOST_WIDE_INT leftover;
14210
14211   leftover = length % bytes_per_iter;
14212   length -= leftover;
14213
14214   /* Create registers and memory references for use within the loop.  */
14215   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14216   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14217
14218   /* Calculate the value that SRC_REG should have after the last iteration of
14219      the loop.  */
14220   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14221                                    0, 0, OPTAB_WIDEN);
14222
14223   /* Emit the start of the loop.  */
14224   rtx_code_label *label = gen_label_rtx ();
14225   emit_label (label);
14226
14227   /* Emit the loop body.  */
14228   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14229                                      interleave_factor);
14230
14231   /* Move on to the next block.  */
14232   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14233   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14234
14235   /* Emit the loop condition.  */
14236   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14237   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14238
14239   /* Mop up any left-over bytes.  */
14240   if (leftover)
14241     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14242 }
14243
14244 /* Emit a block move when either the source or destination is unaligned (not
14245    aligned to a four-byte boundary).  This may need further tuning depending on
14246    core type, optimize_size setting, etc.  */
14247
14248 static int
14249 arm_movmemqi_unaligned (rtx *operands)
14250 {
14251   HOST_WIDE_INT length = INTVAL (operands[2]);
14252
14253   if (optimize_size)
14254     {
14255       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14256       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14257       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14258          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14259          or dst_aligned though: allow more interleaving in those cases since the
14260          resulting code can be smaller.  */
14261       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14262       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14263
14264       if (length > 12)
14265         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14266                                        interleave_factor, bytes_per_iter);
14267       else
14268         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14269                                            interleave_factor);
14270     }
14271   else
14272     {
14273       /* Note that the loop created by arm_block_move_unaligned_loop may be
14274          subject to loop unrolling, which makes tuning this condition a little
14275          redundant.  */
14276       if (length > 32)
14277         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14278       else
14279         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14280     }
14281
14282   return 1;
14283 }
14284
14285 int
14286 arm_gen_movmemqi (rtx *operands)
14287 {
14288   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14289   HOST_WIDE_INT srcoffset, dstoffset;
14290   rtx src, dst, srcbase, dstbase;
14291   rtx part_bytes_reg = NULL;
14292   rtx mem;
14293
14294   if (!CONST_INT_P (operands[2])
14295       || !CONST_INT_P (operands[3])
14296       || INTVAL (operands[2]) > 64)
14297     return 0;
14298
14299   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14300     return arm_movmemqi_unaligned (operands);
14301
14302   if (INTVAL (operands[3]) & 3)
14303     return 0;
14304
14305   dstbase = operands[0];
14306   srcbase = operands[1];
14307
14308   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14309   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14310
14311   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14312   out_words_to_go = INTVAL (operands[2]) / 4;
14313   last_bytes = INTVAL (operands[2]) & 3;
14314   dstoffset = srcoffset = 0;
14315
14316   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14317     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14318
14319   while (in_words_to_go >= 2)
14320     {
14321       if (in_words_to_go > 4)
14322         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14323                                           TRUE, srcbase, &srcoffset));
14324       else
14325         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14326                                           src, FALSE, srcbase,
14327                                           &srcoffset));
14328
14329       if (out_words_to_go)
14330         {
14331           if (out_words_to_go > 4)
14332             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14333                                                TRUE, dstbase, &dstoffset));
14334           else if (out_words_to_go != 1)
14335             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14336                                                out_words_to_go, dst,
14337                                                (last_bytes == 0
14338                                                 ? FALSE : TRUE),
14339                                                dstbase, &dstoffset));
14340           else
14341             {
14342               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14343               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14344               if (last_bytes != 0)
14345                 {
14346                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14347                   dstoffset += 4;
14348                 }
14349             }
14350         }
14351
14352       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14353       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14354     }
14355
14356   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14357   if (out_words_to_go)
14358     {
14359       rtx sreg;
14360
14361       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14362       sreg = copy_to_reg (mem);
14363
14364       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14365       emit_move_insn (mem, sreg);
14366       in_words_to_go--;
14367
14368       gcc_assert (!in_words_to_go);     /* Sanity check */
14369     }
14370
14371   if (in_words_to_go)
14372     {
14373       gcc_assert (in_words_to_go > 0);
14374
14375       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14376       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14377     }
14378
14379   gcc_assert (!last_bytes || part_bytes_reg);
14380
14381   if (BYTES_BIG_ENDIAN && last_bytes)
14382     {
14383       rtx tmp = gen_reg_rtx (SImode);
14384
14385       /* The bytes we want are in the top end of the word.  */
14386       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14387                               GEN_INT (8 * (4 - last_bytes))));
14388       part_bytes_reg = tmp;
14389
14390       while (last_bytes)
14391         {
14392           mem = adjust_automodify_address (dstbase, QImode,
14393                                            plus_constant (Pmode, dst,
14394                                                           last_bytes - 1),
14395                                            dstoffset + last_bytes - 1);
14396           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14397
14398           if (--last_bytes)
14399             {
14400               tmp = gen_reg_rtx (SImode);
14401               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14402               part_bytes_reg = tmp;
14403             }
14404         }
14405
14406     }
14407   else
14408     {
14409       if (last_bytes > 1)
14410         {
14411           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14412           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14413           last_bytes -= 2;
14414           if (last_bytes)
14415             {
14416               rtx tmp = gen_reg_rtx (SImode);
14417               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14418               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14419               part_bytes_reg = tmp;
14420               dstoffset += 2;
14421             }
14422         }
14423
14424       if (last_bytes)
14425         {
14426           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14427           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14428         }
14429     }
14430
14431   return 1;
14432 }
14433
14434 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14435 by mode size.  */
14436 inline static rtx
14437 next_consecutive_mem (rtx mem)
14438 {
14439   machine_mode mode = GET_MODE (mem);
14440   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14441   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14442
14443   return adjust_automodify_address (mem, mode, addr, offset);
14444 }
14445
14446 /* Copy using LDRD/STRD instructions whenever possible.
14447    Returns true upon success. */
14448 bool
14449 gen_movmem_ldrd_strd (rtx *operands)
14450 {
14451   unsigned HOST_WIDE_INT len;
14452   HOST_WIDE_INT align;
14453   rtx src, dst, base;
14454   rtx reg0;
14455   bool src_aligned, dst_aligned;
14456   bool src_volatile, dst_volatile;
14457
14458   gcc_assert (CONST_INT_P (operands[2]));
14459   gcc_assert (CONST_INT_P (operands[3]));
14460
14461   len = UINTVAL (operands[2]);
14462   if (len > 64)
14463     return false;
14464
14465   /* Maximum alignment we can assume for both src and dst buffers.  */
14466   align = INTVAL (operands[3]);
14467
14468   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14469     return false;
14470
14471   /* Place src and dst addresses in registers
14472      and update the corresponding mem rtx.  */
14473   dst = operands[0];
14474   dst_volatile = MEM_VOLATILE_P (dst);
14475   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14476   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14477   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14478
14479   src = operands[1];
14480   src_volatile = MEM_VOLATILE_P (src);
14481   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14482   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14483   src = adjust_automodify_address (src, VOIDmode, base, 0);
14484
14485   if (!unaligned_access && !(src_aligned && dst_aligned))
14486     return false;
14487
14488   if (src_volatile || dst_volatile)
14489     return false;
14490
14491   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14492   if (!(dst_aligned || src_aligned))
14493     return arm_gen_movmemqi (operands);
14494
14495   /* If the either src or dst is unaligned we'll be accessing it as pairs
14496      of unaligned SImode accesses.  Otherwise we can generate DImode
14497      ldrd/strd instructions.  */
14498   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14499   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14500
14501   while (len >= 8)
14502     {
14503       len -= 8;
14504       reg0 = gen_reg_rtx (DImode);
14505       rtx low_reg = NULL_RTX;
14506       rtx hi_reg = NULL_RTX;
14507
14508       if (!src_aligned || !dst_aligned)
14509         {
14510           low_reg = gen_lowpart (SImode, reg0);
14511           hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14512         }
14513       if (src_aligned)
14514         emit_move_insn (reg0, src);
14515       else
14516         {
14517           emit_insn (gen_unaligned_loadsi (low_reg, src));
14518           src = next_consecutive_mem (src);
14519           emit_insn (gen_unaligned_loadsi (hi_reg, src));
14520         }
14521
14522       if (dst_aligned)
14523         emit_move_insn (dst, reg0);
14524       else
14525         {
14526           emit_insn (gen_unaligned_storesi (dst, low_reg));
14527           dst = next_consecutive_mem (dst);
14528           emit_insn (gen_unaligned_storesi (dst, hi_reg));
14529         }
14530
14531       src = next_consecutive_mem (src);
14532       dst = next_consecutive_mem (dst);
14533     }
14534
14535   gcc_assert (len < 8);
14536   if (len >= 4)
14537     {
14538       /* More than a word but less than a double-word to copy.  Copy a word.  */
14539       reg0 = gen_reg_rtx (SImode);
14540       src = adjust_address (src, SImode, 0);
14541       dst = adjust_address (dst, SImode, 0);
14542       if (src_aligned)
14543         emit_move_insn (reg0, src);
14544       else
14545         emit_insn (gen_unaligned_loadsi (reg0, src));
14546
14547       if (dst_aligned)
14548         emit_move_insn (dst, reg0);
14549       else
14550         emit_insn (gen_unaligned_storesi (dst, reg0));
14551
14552       src = next_consecutive_mem (src);
14553       dst = next_consecutive_mem (dst);
14554       len -= 4;
14555     }
14556
14557   if (len == 0)
14558     return true;
14559
14560   /* Copy the remaining bytes.  */
14561   if (len >= 2)
14562     {
14563       dst = adjust_address (dst, HImode, 0);
14564       src = adjust_address (src, HImode, 0);
14565       reg0 = gen_reg_rtx (SImode);
14566       if (src_aligned)
14567         emit_insn (gen_zero_extendhisi2 (reg0, src));
14568       else
14569         emit_insn (gen_unaligned_loadhiu (reg0, src));
14570
14571       if (dst_aligned)
14572         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14573       else
14574         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14575
14576       src = next_consecutive_mem (src);
14577       dst = next_consecutive_mem (dst);
14578       if (len == 2)
14579         return true;
14580     }
14581
14582   dst = adjust_address (dst, QImode, 0);
14583   src = adjust_address (src, QImode, 0);
14584   reg0 = gen_reg_rtx (QImode);
14585   emit_move_insn (reg0, src);
14586   emit_move_insn (dst, reg0);
14587   return true;
14588 }
14589
14590 /* Select a dominance comparison mode if possible for a test of the general
14591    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14592    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14593    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14594    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14595    In all cases OP will be either EQ or NE, but we don't need to know which
14596    here.  If we are unable to support a dominance comparison we return
14597    CC mode.  This will then fail to match for the RTL expressions that
14598    generate this call.  */
14599 machine_mode
14600 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14601 {
14602   enum rtx_code cond1, cond2;
14603   int swapped = 0;
14604
14605   /* Currently we will probably get the wrong result if the individual
14606      comparisons are not simple.  This also ensures that it is safe to
14607      reverse a comparison if necessary.  */
14608   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14609        != CCmode)
14610       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14611           != CCmode))
14612     return CCmode;
14613
14614   /* The if_then_else variant of this tests the second condition if the
14615      first passes, but is true if the first fails.  Reverse the first
14616      condition to get a true "inclusive-or" expression.  */
14617   if (cond_or == DOM_CC_NX_OR_Y)
14618     cond1 = reverse_condition (cond1);
14619
14620   /* If the comparisons are not equal, and one doesn't dominate the other,
14621      then we can't do this.  */
14622   if (cond1 != cond2
14623       && !comparison_dominates_p (cond1, cond2)
14624       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14625     return CCmode;
14626
14627   if (swapped)
14628     std::swap (cond1, cond2);
14629
14630   switch (cond1)
14631     {
14632     case EQ:
14633       if (cond_or == DOM_CC_X_AND_Y)
14634         return CC_DEQmode;
14635
14636       switch (cond2)
14637         {
14638         case EQ: return CC_DEQmode;
14639         case LE: return CC_DLEmode;
14640         case LEU: return CC_DLEUmode;
14641         case GE: return CC_DGEmode;
14642         case GEU: return CC_DGEUmode;
14643         default: gcc_unreachable ();
14644         }
14645
14646     case LT:
14647       if (cond_or == DOM_CC_X_AND_Y)
14648         return CC_DLTmode;
14649
14650       switch (cond2)
14651         {
14652         case  LT:
14653             return CC_DLTmode;
14654         case LE:
14655           return CC_DLEmode;
14656         case NE:
14657           return CC_DNEmode;
14658         default:
14659           gcc_unreachable ();
14660         }
14661
14662     case GT:
14663       if (cond_or == DOM_CC_X_AND_Y)
14664         return CC_DGTmode;
14665
14666       switch (cond2)
14667         {
14668         case GT:
14669           return CC_DGTmode;
14670         case GE:
14671           return CC_DGEmode;
14672         case NE:
14673           return CC_DNEmode;
14674         default:
14675           gcc_unreachable ();
14676         }
14677
14678     case LTU:
14679       if (cond_or == DOM_CC_X_AND_Y)
14680         return CC_DLTUmode;
14681
14682       switch (cond2)
14683         {
14684         case LTU:
14685           return CC_DLTUmode;
14686         case LEU:
14687           return CC_DLEUmode;
14688         case NE:
14689           return CC_DNEmode;
14690         default:
14691           gcc_unreachable ();
14692         }
14693
14694     case GTU:
14695       if (cond_or == DOM_CC_X_AND_Y)
14696         return CC_DGTUmode;
14697
14698       switch (cond2)
14699         {
14700         case GTU:
14701           return CC_DGTUmode;
14702         case GEU:
14703           return CC_DGEUmode;
14704         case NE:
14705           return CC_DNEmode;
14706         default:
14707           gcc_unreachable ();
14708         }
14709
14710     /* The remaining cases only occur when both comparisons are the
14711        same.  */
14712     case NE:
14713       gcc_assert (cond1 == cond2);
14714       return CC_DNEmode;
14715
14716     case LE:
14717       gcc_assert (cond1 == cond2);
14718       return CC_DLEmode;
14719
14720     case GE:
14721       gcc_assert (cond1 == cond2);
14722       return CC_DGEmode;
14723
14724     case LEU:
14725       gcc_assert (cond1 == cond2);
14726       return CC_DLEUmode;
14727
14728     case GEU:
14729       gcc_assert (cond1 == cond2);
14730       return CC_DGEUmode;
14731
14732     default:
14733       gcc_unreachable ();
14734     }
14735 }
14736
14737 machine_mode
14738 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14739 {
14740   /* All floating point compares return CCFP if it is an equality
14741      comparison, and CCFPE otherwise.  */
14742   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14743     {
14744       switch (op)
14745         {
14746         case EQ:
14747         case NE:
14748         case UNORDERED:
14749         case ORDERED:
14750         case UNLT:
14751         case UNLE:
14752         case UNGT:
14753         case UNGE:
14754         case UNEQ:
14755         case LTGT:
14756           return CCFPmode;
14757
14758         case LT:
14759         case LE:
14760         case GT:
14761         case GE:
14762           return CCFPEmode;
14763
14764         default:
14765           gcc_unreachable ();
14766         }
14767     }
14768
14769   /* A compare with a shifted operand.  Because of canonicalization, the
14770      comparison will have to be swapped when we emit the assembler.  */
14771   if (GET_MODE (y) == SImode
14772       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14773       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14774           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14775           || GET_CODE (x) == ROTATERT))
14776     return CC_SWPmode;
14777
14778   /* This operation is performed swapped, but since we only rely on the Z
14779      flag we don't need an additional mode.  */
14780   if (GET_MODE (y) == SImode
14781       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14782       && GET_CODE (x) == NEG
14783       && (op == EQ || op == NE))
14784     return CC_Zmode;
14785
14786   /* This is a special case that is used by combine to allow a
14787      comparison of a shifted byte load to be split into a zero-extend
14788      followed by a comparison of the shifted integer (only valid for
14789      equalities and unsigned inequalities).  */
14790   if (GET_MODE (x) == SImode
14791       && GET_CODE (x) == ASHIFT
14792       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14793       && GET_CODE (XEXP (x, 0)) == SUBREG
14794       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14795       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14796       && (op == EQ || op == NE
14797           || op == GEU || op == GTU || op == LTU || op == LEU)
14798       && CONST_INT_P (y))
14799     return CC_Zmode;
14800
14801   /* A construct for a conditional compare, if the false arm contains
14802      0, then both conditions must be true, otherwise either condition
14803      must be true.  Not all conditions are possible, so CCmode is
14804      returned if it can't be done.  */
14805   if (GET_CODE (x) == IF_THEN_ELSE
14806       && (XEXP (x, 2) == const0_rtx
14807           || XEXP (x, 2) == const1_rtx)
14808       && COMPARISON_P (XEXP (x, 0))
14809       && COMPARISON_P (XEXP (x, 1)))
14810     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14811                                          INTVAL (XEXP (x, 2)));
14812
14813   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14814   if (GET_CODE (x) == AND
14815       && (op == EQ || op == NE)
14816       && COMPARISON_P (XEXP (x, 0))
14817       && COMPARISON_P (XEXP (x, 1)))
14818     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14819                                          DOM_CC_X_AND_Y);
14820
14821   if (GET_CODE (x) == IOR
14822       && (op == EQ || op == NE)
14823       && COMPARISON_P (XEXP (x, 0))
14824       && COMPARISON_P (XEXP (x, 1)))
14825     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14826                                          DOM_CC_X_OR_Y);
14827
14828   /* An operation (on Thumb) where we want to test for a single bit.
14829      This is done by shifting that bit up into the top bit of a
14830      scratch register; we can then branch on the sign bit.  */
14831   if (TARGET_THUMB1
14832       && GET_MODE (x) == SImode
14833       && (op == EQ || op == NE)
14834       && GET_CODE (x) == ZERO_EXTRACT
14835       && XEXP (x, 1) == const1_rtx)
14836     return CC_Nmode;
14837
14838   /* An operation that sets the condition codes as a side-effect, the
14839      V flag is not set correctly, so we can only use comparisons where
14840      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14841      instead.)  */
14842   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14843   if (GET_MODE (x) == SImode
14844       && y == const0_rtx
14845       && (op == EQ || op == NE || op == LT || op == GE)
14846       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14847           || GET_CODE (x) == AND || GET_CODE (x) == IOR
14848           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14849           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14850           || GET_CODE (x) == LSHIFTRT
14851           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14852           || GET_CODE (x) == ROTATERT
14853           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14854     return CC_NOOVmode;
14855
14856   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14857     return CC_Zmode;
14858
14859   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14860       && GET_CODE (x) == PLUS
14861       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14862     return CC_Cmode;
14863
14864   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14865     {
14866       switch (op)
14867         {
14868         case EQ:
14869         case NE:
14870           /* A DImode comparison against zero can be implemented by
14871              or'ing the two halves together.  */
14872           if (y == const0_rtx)
14873             return CC_Zmode;
14874
14875           /* We can do an equality test in three Thumb instructions.  */
14876           if (!TARGET_32BIT)
14877             return CC_Zmode;
14878
14879           /* FALLTHROUGH */
14880
14881         case LTU:
14882         case LEU:
14883         case GTU:
14884         case GEU:
14885           /* DImode unsigned comparisons can be implemented by cmp +
14886              cmpeq without a scratch register.  Not worth doing in
14887              Thumb-2.  */
14888           if (TARGET_32BIT)
14889             return CC_CZmode;
14890
14891           /* FALLTHROUGH */
14892
14893         case LT:
14894         case LE:
14895         case GT:
14896         case GE:
14897           /* DImode signed and unsigned comparisons can be implemented
14898              by cmp + sbcs with a scratch register, but that does not
14899              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14900           gcc_assert (op != EQ && op != NE);
14901           return CC_NCVmode;
14902
14903         default:
14904           gcc_unreachable ();
14905         }
14906     }
14907
14908   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14909     return GET_MODE (x);
14910
14911   return CCmode;
14912 }
14913
14914 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14915    return the rtx for register 0 in the proper mode.  FP means this is a
14916    floating point compare: I don't think that it is needed on the arm.  */
14917 rtx
14918 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14919 {
14920   machine_mode mode;
14921   rtx cc_reg;
14922   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14923
14924   /* We might have X as a constant, Y as a register because of the predicates
14925      used for cmpdi.  If so, force X to a register here.  */
14926   if (dimode_comparison && !REG_P (x))
14927     x = force_reg (DImode, x);
14928
14929   mode = SELECT_CC_MODE (code, x, y);
14930   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14931
14932   if (dimode_comparison
14933       && mode != CC_CZmode)
14934     {
14935       rtx clobber, set;
14936
14937       /* To compare two non-zero values for equality, XOR them and
14938          then compare against zero.  Not used for ARM mode; there
14939          CC_CZmode is cheaper.  */
14940       if (mode == CC_Zmode && y != const0_rtx)
14941         {
14942           gcc_assert (!reload_completed);
14943           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14944           y = const0_rtx;
14945         }
14946
14947       /* A scratch register is required.  */
14948       if (reload_completed)
14949         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14950       else
14951         scratch = gen_rtx_SCRATCH (SImode);
14952
14953       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14954       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14955       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14956     }
14957   else
14958     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14959
14960   return cc_reg;
14961 }
14962
14963 /* Generate a sequence of insns that will generate the correct return
14964    address mask depending on the physical architecture that the program
14965    is running on.  */
14966 rtx
14967 arm_gen_return_addr_mask (void)
14968 {
14969   rtx reg = gen_reg_rtx (Pmode);
14970
14971   emit_insn (gen_return_addr_mask (reg));
14972   return reg;
14973 }
14974
14975 void
14976 arm_reload_in_hi (rtx *operands)
14977 {
14978   rtx ref = operands[1];
14979   rtx base, scratch;
14980   HOST_WIDE_INT offset = 0;
14981
14982   if (GET_CODE (ref) == SUBREG)
14983     {
14984       offset = SUBREG_BYTE (ref);
14985       ref = SUBREG_REG (ref);
14986     }
14987
14988   if (REG_P (ref))
14989     {
14990       /* We have a pseudo which has been spilt onto the stack; there
14991          are two cases here: the first where there is a simple
14992          stack-slot replacement and a second where the stack-slot is
14993          out of range, or is used as a subreg.  */
14994       if (reg_equiv_mem (REGNO (ref)))
14995         {
14996           ref = reg_equiv_mem (REGNO (ref));
14997           base = find_replacement (&XEXP (ref, 0));
14998         }
14999       else
15000         /* The slot is out of range, or was dressed up in a SUBREG.  */
15001         base = reg_equiv_address (REGNO (ref));
15002
15003       /* PR 62554: If there is no equivalent memory location then just move
15004          the value as an SImode register move.  This happens when the target
15005          architecture variant does not have an HImode register move.  */
15006       if (base == NULL)
15007         {
15008           gcc_assert (REG_P (operands[0]));
15009           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15010                                 gen_rtx_SUBREG (SImode, ref, 0)));
15011           return;
15012         }
15013     }
15014   else
15015     base = find_replacement (&XEXP (ref, 0));
15016
15017   /* Handle the case where the address is too complex to be offset by 1.  */
15018   if (GET_CODE (base) == MINUS
15019       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15020     {
15021       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15022
15023       emit_set_insn (base_plus, base);
15024       base = base_plus;
15025     }
15026   else if (GET_CODE (base) == PLUS)
15027     {
15028       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15029       HOST_WIDE_INT hi, lo;
15030
15031       offset += INTVAL (XEXP (base, 1));
15032       base = XEXP (base, 0);
15033
15034       /* Rework the address into a legal sequence of insns.  */
15035       /* Valid range for lo is -4095 -> 4095 */
15036       lo = (offset >= 0
15037             ? (offset & 0xfff)
15038             : -((-offset) & 0xfff));
15039
15040       /* Corner case, if lo is the max offset then we would be out of range
15041          once we have added the additional 1 below, so bump the msb into the
15042          pre-loading insn(s).  */
15043       if (lo == 4095)
15044         lo &= 0x7ff;
15045
15046       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15047              ^ (HOST_WIDE_INT) 0x80000000)
15048             - (HOST_WIDE_INT) 0x80000000);
15049
15050       gcc_assert (hi + lo == offset);
15051
15052       if (hi != 0)
15053         {
15054           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15055
15056           /* Get the base address; addsi3 knows how to handle constants
15057              that require more than one insn.  */
15058           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15059           base = base_plus;
15060           offset = lo;
15061         }
15062     }
15063
15064   /* Operands[2] may overlap operands[0] (though it won't overlap
15065      operands[1]), that's why we asked for a DImode reg -- so we can
15066      use the bit that does not overlap.  */
15067   if (REGNO (operands[2]) == REGNO (operands[0]))
15068     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15069   else
15070     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15071
15072   emit_insn (gen_zero_extendqisi2 (scratch,
15073                                    gen_rtx_MEM (QImode,
15074                                                 plus_constant (Pmode, base,
15075                                                                offset))));
15076   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15077                                    gen_rtx_MEM (QImode,
15078                                                 plus_constant (Pmode, base,
15079                                                                offset + 1))));
15080   if (!BYTES_BIG_ENDIAN)
15081     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15082                    gen_rtx_IOR (SImode,
15083                                 gen_rtx_ASHIFT
15084                                 (SImode,
15085                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15086                                  GEN_INT (8)),
15087                                 scratch));
15088   else
15089     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15090                    gen_rtx_IOR (SImode,
15091                                 gen_rtx_ASHIFT (SImode, scratch,
15092                                                 GEN_INT (8)),
15093                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15094 }
15095
15096 /* Handle storing a half-word to memory during reload by synthesizing as two
15097    byte stores.  Take care not to clobber the input values until after we
15098    have moved them somewhere safe.  This code assumes that if the DImode
15099    scratch in operands[2] overlaps either the input value or output address
15100    in some way, then that value must die in this insn (we absolutely need
15101    two scratch registers for some corner cases).  */
15102 void
15103 arm_reload_out_hi (rtx *operands)
15104 {
15105   rtx ref = operands[0];
15106   rtx outval = operands[1];
15107   rtx base, scratch;
15108   HOST_WIDE_INT offset = 0;
15109
15110   if (GET_CODE (ref) == SUBREG)
15111     {
15112       offset = SUBREG_BYTE (ref);
15113       ref = SUBREG_REG (ref);
15114     }
15115
15116   if (REG_P (ref))
15117     {
15118       /* We have a pseudo which has been spilt onto the stack; there
15119          are two cases here: the first where there is a simple
15120          stack-slot replacement and a second where the stack-slot is
15121          out of range, or is used as a subreg.  */
15122       if (reg_equiv_mem (REGNO (ref)))
15123         {
15124           ref = reg_equiv_mem (REGNO (ref));
15125           base = find_replacement (&XEXP (ref, 0));
15126         }
15127       else
15128         /* The slot is out of range, or was dressed up in a SUBREG.  */
15129         base = reg_equiv_address (REGNO (ref));
15130
15131       /* PR 62254: If there is no equivalent memory location then just move
15132          the value as an SImode register move.  This happens when the target
15133          architecture variant does not have an HImode register move.  */
15134       if (base == NULL)
15135         {
15136           gcc_assert (REG_P (outval) || SUBREG_P (outval));
15137
15138           if (REG_P (outval))
15139             {
15140               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15141                                     gen_rtx_SUBREG (SImode, outval, 0)));
15142             }
15143           else /* SUBREG_P (outval)  */
15144             {
15145               if (GET_MODE (SUBREG_REG (outval)) == SImode)
15146                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15147                                       SUBREG_REG (outval)));
15148               else
15149                 /* FIXME: Handle other cases ?  */
15150                 gcc_unreachable ();
15151             }
15152           return;
15153         }
15154     }
15155   else
15156     base = find_replacement (&XEXP (ref, 0));
15157
15158   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15159
15160   /* Handle the case where the address is too complex to be offset by 1.  */
15161   if (GET_CODE (base) == MINUS
15162       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15163     {
15164       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15165
15166       /* Be careful not to destroy OUTVAL.  */
15167       if (reg_overlap_mentioned_p (base_plus, outval))
15168         {
15169           /* Updating base_plus might destroy outval, see if we can
15170              swap the scratch and base_plus.  */
15171           if (!reg_overlap_mentioned_p (scratch, outval))
15172             std::swap (scratch, base_plus);
15173           else
15174             {
15175               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15176
15177               /* Be conservative and copy OUTVAL into the scratch now,
15178                  this should only be necessary if outval is a subreg
15179                  of something larger than a word.  */
15180               /* XXX Might this clobber base?  I can't see how it can,
15181                  since scratch is known to overlap with OUTVAL, and
15182                  must be wider than a word.  */
15183               emit_insn (gen_movhi (scratch_hi, outval));
15184               outval = scratch_hi;
15185             }
15186         }
15187
15188       emit_set_insn (base_plus, base);
15189       base = base_plus;
15190     }
15191   else if (GET_CODE (base) == PLUS)
15192     {
15193       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15194       HOST_WIDE_INT hi, lo;
15195
15196       offset += INTVAL (XEXP (base, 1));
15197       base = XEXP (base, 0);
15198
15199       /* Rework the address into a legal sequence of insns.  */
15200       /* Valid range for lo is -4095 -> 4095 */
15201       lo = (offset >= 0
15202             ? (offset & 0xfff)
15203             : -((-offset) & 0xfff));
15204
15205       /* Corner case, if lo is the max offset then we would be out of range
15206          once we have added the additional 1 below, so bump the msb into the
15207          pre-loading insn(s).  */
15208       if (lo == 4095)
15209         lo &= 0x7ff;
15210
15211       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15212              ^ (HOST_WIDE_INT) 0x80000000)
15213             - (HOST_WIDE_INT) 0x80000000);
15214
15215       gcc_assert (hi + lo == offset);
15216
15217       if (hi != 0)
15218         {
15219           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15220
15221           /* Be careful not to destroy OUTVAL.  */
15222           if (reg_overlap_mentioned_p (base_plus, outval))
15223             {
15224               /* Updating base_plus might destroy outval, see if we
15225                  can swap the scratch and base_plus.  */
15226               if (!reg_overlap_mentioned_p (scratch, outval))
15227                 std::swap (scratch, base_plus);
15228               else
15229                 {
15230                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15231
15232                   /* Be conservative and copy outval into scratch now,
15233                      this should only be necessary if outval is a
15234                      subreg of something larger than a word.  */
15235                   /* XXX Might this clobber base?  I can't see how it
15236                      can, since scratch is known to overlap with
15237                      outval.  */
15238                   emit_insn (gen_movhi (scratch_hi, outval));
15239                   outval = scratch_hi;
15240                 }
15241             }
15242
15243           /* Get the base address; addsi3 knows how to handle constants
15244              that require more than one insn.  */
15245           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15246           base = base_plus;
15247           offset = lo;
15248         }
15249     }
15250
15251   if (BYTES_BIG_ENDIAN)
15252     {
15253       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15254                                          plus_constant (Pmode, base,
15255                                                         offset + 1)),
15256                             gen_lowpart (QImode, outval)));
15257       emit_insn (gen_lshrsi3 (scratch,
15258                               gen_rtx_SUBREG (SImode, outval, 0),
15259                               GEN_INT (8)));
15260       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15261                                                                 offset)),
15262                             gen_lowpart (QImode, scratch)));
15263     }
15264   else
15265     {
15266       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15267                                                                 offset)),
15268                             gen_lowpart (QImode, outval)));
15269       emit_insn (gen_lshrsi3 (scratch,
15270                               gen_rtx_SUBREG (SImode, outval, 0),
15271                               GEN_INT (8)));
15272       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15273                                          plus_constant (Pmode, base,
15274                                                         offset + 1)),
15275                             gen_lowpart (QImode, scratch)));
15276     }
15277 }
15278
15279 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15280    (padded to the size of a word) should be passed in a register.  */
15281
15282 static bool
15283 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15284 {
15285   if (TARGET_AAPCS_BASED)
15286     return must_pass_in_stack_var_size (mode, type);
15287   else
15288     return must_pass_in_stack_var_size_or_pad (mode, type);
15289 }
15290
15291
15292 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15293    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15294    the default.  For AAPCS based ABIs small aggregate types are placed
15295    in the lowest memory address.  */
15296
15297 static pad_direction
15298 arm_function_arg_padding (machine_mode mode, const_tree type)
15299 {
15300   if (!TARGET_AAPCS_BASED)
15301     return default_function_arg_padding (mode, type);
15302
15303   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15304     return PAD_DOWNWARD;
15305
15306   return PAD_UPWARD;
15307 }
15308
15309
15310 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15311    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15312    register has useful data, and return the opposite if the most
15313    significant byte does.  */
15314
15315 bool
15316 arm_pad_reg_upward (machine_mode mode,
15317                     tree type, int first ATTRIBUTE_UNUSED)
15318 {
15319   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15320     {
15321       /* For AAPCS, small aggregates, small fixed-point types,
15322          and small complex types are always padded upwards.  */
15323       if (type)
15324         {
15325           if ((AGGREGATE_TYPE_P (type)
15326                || TREE_CODE (type) == COMPLEX_TYPE
15327                || FIXED_POINT_TYPE_P (type))
15328               && int_size_in_bytes (type) <= 4)
15329             return true;
15330         }
15331       else
15332         {
15333           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15334               && GET_MODE_SIZE (mode) <= 4)
15335             return true;
15336         }
15337     }
15338
15339   /* Otherwise, use default padding.  */
15340   return !BYTES_BIG_ENDIAN;
15341 }
15342
15343 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15344    assuming that the address in the base register is word aligned.  */
15345 bool
15346 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15347 {
15348   HOST_WIDE_INT max_offset;
15349
15350   /* Offset must be a multiple of 4 in Thumb mode.  */
15351   if (TARGET_THUMB2 && ((offset & 3) != 0))
15352     return false;
15353
15354   if (TARGET_THUMB2)
15355     max_offset = 1020;
15356   else if (TARGET_ARM)
15357     max_offset = 255;
15358   else
15359     return false;
15360
15361   return ((offset <= max_offset) && (offset >= -max_offset));
15362 }
15363
15364 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15365    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15366    Assumes that the address in the base register RN is word aligned.  Pattern
15367    guarantees that both memory accesses use the same base register,
15368    the offsets are constants within the range, and the gap between the offsets is 4.
15369    If preload complete then check that registers are legal.  WBACK indicates whether
15370    address is updated.  LOAD indicates whether memory access is load or store.  */
15371 bool
15372 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15373                        bool wback, bool load)
15374 {
15375   unsigned int t, t2, n;
15376
15377   if (!reload_completed)
15378     return true;
15379
15380   if (!offset_ok_for_ldrd_strd (offset))
15381     return false;
15382
15383   t = REGNO (rt);
15384   t2 = REGNO (rt2);
15385   n = REGNO (rn);
15386
15387   if ((TARGET_THUMB2)
15388       && ((wback && (n == t || n == t2))
15389           || (t == SP_REGNUM)
15390           || (t == PC_REGNUM)
15391           || (t2 == SP_REGNUM)
15392           || (t2 == PC_REGNUM)
15393           || (!load && (n == PC_REGNUM))
15394           || (load && (t == t2))
15395           /* Triggers Cortex-M3 LDRD errata.  */
15396           || (!wback && load && fix_cm3_ldrd && (n == t))))
15397     return false;
15398
15399   if ((TARGET_ARM)
15400       && ((wback && (n == t || n == t2))
15401           || (t2 == PC_REGNUM)
15402           || (t % 2 != 0)   /* First destination register is not even.  */
15403           || (t2 != t + 1)
15404           /* PC can be used as base register (for offset addressing only),
15405              but it is depricated.  */
15406           || (n == PC_REGNUM)))
15407     return false;
15408
15409   return true;
15410 }
15411
15412 /* Return true if a 64-bit access with alignment ALIGN and with a
15413    constant offset OFFSET from the base pointer is permitted on this
15414    architecture.  */
15415 static bool
15416 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15417 {
15418   return (unaligned_access
15419           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15420           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15421 }
15422
15423 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15424    operand MEM's address contains an immediate offset from the base
15425    register and has no side effects, in which case it sets BASE,
15426    OFFSET and ALIGN accordingly.  */
15427 static bool
15428 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15429 {
15430   rtx addr;
15431
15432   gcc_assert (base != NULL && offset != NULL);
15433
15434   /* TODO: Handle more general memory operand patterns, such as
15435      PRE_DEC and PRE_INC.  */
15436
15437   if (side_effects_p (mem))
15438     return false;
15439
15440   /* Can't deal with subregs.  */
15441   if (GET_CODE (mem) == SUBREG)
15442     return false;
15443
15444   gcc_assert (MEM_P (mem));
15445
15446   *offset = const0_rtx;
15447   *align = MEM_ALIGN (mem);
15448
15449   addr = XEXP (mem, 0);
15450
15451   /* If addr isn't valid for DImode, then we can't handle it.  */
15452   if (!arm_legitimate_address_p (DImode, addr,
15453                                  reload_in_progress || reload_completed))
15454     return false;
15455
15456   if (REG_P (addr))
15457     {
15458       *base = addr;
15459       return true;
15460     }
15461   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15462     {
15463       *base = XEXP (addr, 0);
15464       *offset = XEXP (addr, 1);
15465       return (REG_P (*base) && CONST_INT_P (*offset));
15466     }
15467
15468   return false;
15469 }
15470
15471 /* Called from a peephole2 to replace two word-size accesses with a
15472    single LDRD/STRD instruction.  Returns true iff we can generate a
15473    new instruction sequence.  That is, both accesses use the same base
15474    register and the gap between constant offsets is 4.  This function
15475    may reorder its operands to match ldrd/strd RTL templates.
15476    OPERANDS are the operands found by the peephole matcher;
15477    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15478    corresponding memory operands.  LOAD indicaates whether the access
15479    is load or store.  CONST_STORE indicates a store of constant
15480    integer values held in OPERANDS[4,5] and assumes that the pattern
15481    is of length 4 insn, for the purpose of checking dead registers.
15482    COMMUTE indicates that register operands may be reordered.  */
15483 bool
15484 gen_operands_ldrd_strd (rtx *operands, bool load,
15485                         bool const_store, bool commute)
15486 {
15487   int nops = 2;
15488   HOST_WIDE_INT offsets[2], offset, align[2];
15489   rtx base = NULL_RTX;
15490   rtx cur_base, cur_offset, tmp;
15491   int i, gap;
15492   HARD_REG_SET regset;
15493
15494   gcc_assert (!const_store || !load);
15495   /* Check that the memory references are immediate offsets from the
15496      same base register.  Extract the base register, the destination
15497      registers, and the corresponding memory offsets.  */
15498   for (i = 0; i < nops; i++)
15499     {
15500       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15501                                  &align[i]))
15502         return false;
15503
15504       if (i == 0)
15505         base = cur_base;
15506       else if (REGNO (base) != REGNO (cur_base))
15507         return false;
15508
15509       offsets[i] = INTVAL (cur_offset);
15510       if (GET_CODE (operands[i]) == SUBREG)
15511         {
15512           tmp = SUBREG_REG (operands[i]);
15513           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15514           operands[i] = tmp;
15515         }
15516     }
15517
15518   /* Make sure there is no dependency between the individual loads.  */
15519   if (load && REGNO (operands[0]) == REGNO (base))
15520     return false; /* RAW */
15521
15522   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15523     return false; /* WAW */
15524
15525   /* If the same input register is used in both stores
15526      when storing different constants, try to find a free register.
15527      For example, the code
15528         mov r0, 0
15529         str r0, [r2]
15530         mov r0, 1
15531         str r0, [r2, #4]
15532      can be transformed into
15533         mov r1, 0
15534         mov r0, 1
15535         strd r1, r0, [r2]
15536      in Thumb mode assuming that r1 is free.
15537      For ARM mode do the same but only if the starting register
15538      can be made to be even.  */
15539   if (const_store
15540       && REGNO (operands[0]) == REGNO (operands[1])
15541       && INTVAL (operands[4]) != INTVAL (operands[5]))
15542     {
15543     if (TARGET_THUMB2)
15544       {
15545         CLEAR_HARD_REG_SET (regset);
15546         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15547         if (tmp == NULL_RTX)
15548           return false;
15549
15550         /* Use the new register in the first load to ensure that
15551            if the original input register is not dead after peephole,
15552            then it will have the correct constant value.  */
15553         operands[0] = tmp;
15554       }
15555     else if (TARGET_ARM)
15556       {
15557         int regno = REGNO (operands[0]);
15558         if (!peep2_reg_dead_p (4, operands[0]))
15559           {
15560             /* When the input register is even and is not dead after the
15561                pattern, it has to hold the second constant but we cannot
15562                form a legal STRD in ARM mode with this register as the second
15563                register.  */
15564             if (regno % 2 == 0)
15565               return false;
15566
15567             /* Is regno-1 free? */
15568             SET_HARD_REG_SET (regset);
15569             CLEAR_HARD_REG_BIT(regset, regno - 1);
15570             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15571             if (tmp == NULL_RTX)
15572               return false;
15573
15574             operands[0] = tmp;
15575           }
15576         else
15577           {
15578             /* Find a DImode register.  */
15579             CLEAR_HARD_REG_SET (regset);
15580             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15581             if (tmp != NULL_RTX)
15582               {
15583                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15584                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15585               }
15586             else
15587               {
15588                 /* Can we use the input register to form a DI register?  */
15589                 SET_HARD_REG_SET (regset);
15590                 CLEAR_HARD_REG_BIT(regset,
15591                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15592                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15593                 if (tmp == NULL_RTX)
15594                   return false;
15595                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15596               }
15597           }
15598
15599         gcc_assert (operands[0] != NULL_RTX);
15600         gcc_assert (operands[1] != NULL_RTX);
15601         gcc_assert (REGNO (operands[0]) % 2 == 0);
15602         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15603       }
15604     }
15605
15606   /* Make sure the instructions are ordered with lower memory access first.  */
15607   if (offsets[0] > offsets[1])
15608     {
15609       gap = offsets[0] - offsets[1];
15610       offset = offsets[1];
15611
15612       /* Swap the instructions such that lower memory is accessed first.  */
15613       std::swap (operands[0], operands[1]);
15614       std::swap (operands[2], operands[3]);
15615       std::swap (align[0], align[1]);
15616       if (const_store)
15617         std::swap (operands[4], operands[5]);
15618     }
15619   else
15620     {
15621       gap = offsets[1] - offsets[0];
15622       offset = offsets[0];
15623     }
15624
15625   /* Make sure accesses are to consecutive memory locations.  */
15626   if (gap != 4)
15627     return false;
15628
15629   if (!align_ok_ldrd_strd (align[0], offset))
15630     return false;
15631
15632   /* Make sure we generate legal instructions.  */
15633   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15634                              false, load))
15635     return true;
15636
15637   /* In Thumb state, where registers are almost unconstrained, there
15638      is little hope to fix it.  */
15639   if (TARGET_THUMB2)
15640     return false;
15641
15642   if (load && commute)
15643     {
15644       /* Try reordering registers.  */
15645       std::swap (operands[0], operands[1]);
15646       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15647                                  false, load))
15648         return true;
15649     }
15650
15651   if (const_store)
15652     {
15653       /* If input registers are dead after this pattern, they can be
15654          reordered or replaced by other registers that are free in the
15655          current pattern.  */
15656       if (!peep2_reg_dead_p (4, operands[0])
15657           || !peep2_reg_dead_p (4, operands[1]))
15658         return false;
15659
15660       /* Try to reorder the input registers.  */
15661       /* For example, the code
15662            mov r0, 0
15663            mov r1, 1
15664            str r1, [r2]
15665            str r0, [r2, #4]
15666          can be transformed into
15667            mov r1, 0
15668            mov r0, 1
15669            strd r0, [r2]
15670       */
15671       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15672                                   false, false))
15673         {
15674           std::swap (operands[0], operands[1]);
15675           return true;
15676         }
15677
15678       /* Try to find a free DI register.  */
15679       CLEAR_HARD_REG_SET (regset);
15680       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15681       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15682       while (true)
15683         {
15684           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15685           if (tmp == NULL_RTX)
15686             return false;
15687
15688           /* DREG must be an even-numbered register in DImode.
15689              Split it into SI registers.  */
15690           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15691           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15692           gcc_assert (operands[0] != NULL_RTX);
15693           gcc_assert (operands[1] != NULL_RTX);
15694           gcc_assert (REGNO (operands[0]) % 2 == 0);
15695           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15696
15697           return (operands_ok_ldrd_strd (operands[0], operands[1],
15698                                          base, offset,
15699                                          false, load));
15700         }
15701     }
15702
15703   return false;
15704 }
15705
15706
15707
15708 \f
15709 /* Print a symbolic form of X to the debug file, F.  */
15710 static void
15711 arm_print_value (FILE *f, rtx x)
15712 {
15713   switch (GET_CODE (x))
15714     {
15715     case CONST_INT:
15716       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15717       return;
15718
15719     case CONST_DOUBLE:
15720       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15721       return;
15722
15723     case CONST_VECTOR:
15724       {
15725         int i;
15726
15727         fprintf (f, "<");
15728         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15729           {
15730             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15731             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15732               fputc (',', f);
15733           }
15734         fprintf (f, ">");
15735       }
15736       return;
15737
15738     case CONST_STRING:
15739       fprintf (f, "\"%s\"", XSTR (x, 0));
15740       return;
15741
15742     case SYMBOL_REF:
15743       fprintf (f, "`%s'", XSTR (x, 0));
15744       return;
15745
15746     case LABEL_REF:
15747       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15748       return;
15749
15750     case CONST:
15751       arm_print_value (f, XEXP (x, 0));
15752       return;
15753
15754     case PLUS:
15755       arm_print_value (f, XEXP (x, 0));
15756       fprintf (f, "+");
15757       arm_print_value (f, XEXP (x, 1));
15758       return;
15759
15760     case PC:
15761       fprintf (f, "pc");
15762       return;
15763
15764     default:
15765       fprintf (f, "????");
15766       return;
15767     }
15768 }
15769 \f
15770 /* Routines for manipulation of the constant pool.  */
15771
15772 /* Arm instructions cannot load a large constant directly into a
15773    register; they have to come from a pc relative load.  The constant
15774    must therefore be placed in the addressable range of the pc
15775    relative load.  Depending on the precise pc relative load
15776    instruction the range is somewhere between 256 bytes and 4k.  This
15777    means that we often have to dump a constant inside a function, and
15778    generate code to branch around it.
15779
15780    It is important to minimize this, since the branches will slow
15781    things down and make the code larger.
15782
15783    Normally we can hide the table after an existing unconditional
15784    branch so that there is no interruption of the flow, but in the
15785    worst case the code looks like this:
15786
15787         ldr     rn, L1
15788         ...
15789         b       L2
15790         align
15791         L1:     .long value
15792         L2:
15793         ...
15794
15795         ldr     rn, L3
15796         ...
15797         b       L4
15798         align
15799         L3:     .long value
15800         L4:
15801         ...
15802
15803    We fix this by performing a scan after scheduling, which notices
15804    which instructions need to have their operands fetched from the
15805    constant table and builds the table.
15806
15807    The algorithm starts by building a table of all the constants that
15808    need fixing up and all the natural barriers in the function (places
15809    where a constant table can be dropped without breaking the flow).
15810    For each fixup we note how far the pc-relative replacement will be
15811    able to reach and the offset of the instruction into the function.
15812
15813    Having built the table we then group the fixes together to form
15814    tables that are as large as possible (subject to addressing
15815    constraints) and emit each table of constants after the last
15816    barrier that is within range of all the instructions in the group.
15817    If a group does not contain a barrier, then we forcibly create one
15818    by inserting a jump instruction into the flow.  Once the table has
15819    been inserted, the insns are then modified to reference the
15820    relevant entry in the pool.
15821
15822    Possible enhancements to the algorithm (not implemented) are:
15823
15824    1) For some processors and object formats, there may be benefit in
15825    aligning the pools to the start of cache lines; this alignment
15826    would need to be taken into account when calculating addressability
15827    of a pool.  */
15828
15829 /* These typedefs are located at the start of this file, so that
15830    they can be used in the prototypes there.  This comment is to
15831    remind readers of that fact so that the following structures
15832    can be understood more easily.
15833
15834      typedef struct minipool_node    Mnode;
15835      typedef struct minipool_fixup   Mfix;  */
15836
15837 struct minipool_node
15838 {
15839   /* Doubly linked chain of entries.  */
15840   Mnode * next;
15841   Mnode * prev;
15842   /* The maximum offset into the code that this entry can be placed.  While
15843      pushing fixes for forward references, all entries are sorted in order
15844      of increasing max_address.  */
15845   HOST_WIDE_INT max_address;
15846   /* Similarly for an entry inserted for a backwards ref.  */
15847   HOST_WIDE_INT min_address;
15848   /* The number of fixes referencing this entry.  This can become zero
15849      if we "unpush" an entry.  In this case we ignore the entry when we
15850      come to emit the code.  */
15851   int refcount;
15852   /* The offset from the start of the minipool.  */
15853   HOST_WIDE_INT offset;
15854   /* The value in table.  */
15855   rtx value;
15856   /* The mode of value.  */
15857   machine_mode mode;
15858   /* The size of the value.  With iWMMXt enabled
15859      sizes > 4 also imply an alignment of 8-bytes.  */
15860   int fix_size;
15861 };
15862
15863 struct minipool_fixup
15864 {
15865   Mfix *            next;
15866   rtx_insn *        insn;
15867   HOST_WIDE_INT     address;
15868   rtx *             loc;
15869   machine_mode mode;
15870   int               fix_size;
15871   rtx               value;
15872   Mnode *           minipool;
15873   HOST_WIDE_INT     forwards;
15874   HOST_WIDE_INT     backwards;
15875 };
15876
15877 /* Fixes less than a word need padding out to a word boundary.  */
15878 #define MINIPOOL_FIX_SIZE(mode) \
15879   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15880
15881 static Mnode *  minipool_vector_head;
15882 static Mnode *  minipool_vector_tail;
15883 static rtx_code_label   *minipool_vector_label;
15884 static int      minipool_pad;
15885
15886 /* The linked list of all minipool fixes required for this function.  */
15887 Mfix *          minipool_fix_head;
15888 Mfix *          minipool_fix_tail;
15889 /* The fix entry for the current minipool, once it has been placed.  */
15890 Mfix *          minipool_barrier;
15891
15892 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15893 #define JUMP_TABLES_IN_TEXT_SECTION 0
15894 #endif
15895
15896 static HOST_WIDE_INT
15897 get_jump_table_size (rtx_jump_table_data *insn)
15898 {
15899   /* ADDR_VECs only take room if read-only data does into the text
15900      section.  */
15901   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15902     {
15903       rtx body = PATTERN (insn);
15904       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15905       HOST_WIDE_INT size;
15906       HOST_WIDE_INT modesize;
15907
15908       modesize = GET_MODE_SIZE (GET_MODE (body));
15909       size = modesize * XVECLEN (body, elt);
15910       switch (modesize)
15911         {
15912         case 1:
15913           /* Round up size  of TBB table to a halfword boundary.  */
15914           size = (size + 1) & ~HOST_WIDE_INT_1;
15915           break;
15916         case 2:
15917           /* No padding necessary for TBH.  */
15918           break;
15919         case 4:
15920           /* Add two bytes for alignment on Thumb.  */
15921           if (TARGET_THUMB)
15922             size += 2;
15923           break;
15924         default:
15925           gcc_unreachable ();
15926         }
15927       return size;
15928     }
15929
15930   return 0;
15931 }
15932
15933 /* Return the maximum amount of padding that will be inserted before
15934    label LABEL.  */
15935
15936 static HOST_WIDE_INT
15937 get_label_padding (rtx label)
15938 {
15939   HOST_WIDE_INT align, min_insn_size;
15940
15941   align = 1 << label_to_alignment (label);
15942   min_insn_size = TARGET_THUMB ? 2 : 4;
15943   return align > min_insn_size ? align - min_insn_size : 0;
15944 }
15945
15946 /* Move a minipool fix MP from its current location to before MAX_MP.
15947    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15948    constraints may need updating.  */
15949 static Mnode *
15950 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15951                                HOST_WIDE_INT max_address)
15952 {
15953   /* The code below assumes these are different.  */
15954   gcc_assert (mp != max_mp);
15955
15956   if (max_mp == NULL)
15957     {
15958       if (max_address < mp->max_address)
15959         mp->max_address = max_address;
15960     }
15961   else
15962     {
15963       if (max_address > max_mp->max_address - mp->fix_size)
15964         mp->max_address = max_mp->max_address - mp->fix_size;
15965       else
15966         mp->max_address = max_address;
15967
15968       /* Unlink MP from its current position.  Since max_mp is non-null,
15969        mp->prev must be non-null.  */
15970       mp->prev->next = mp->next;
15971       if (mp->next != NULL)
15972         mp->next->prev = mp->prev;
15973       else
15974         minipool_vector_tail = mp->prev;
15975
15976       /* Re-insert it before MAX_MP.  */
15977       mp->next = max_mp;
15978       mp->prev = max_mp->prev;
15979       max_mp->prev = mp;
15980
15981       if (mp->prev != NULL)
15982         mp->prev->next = mp;
15983       else
15984         minipool_vector_head = mp;
15985     }
15986
15987   /* Save the new entry.  */
15988   max_mp = mp;
15989
15990   /* Scan over the preceding entries and adjust their addresses as
15991      required.  */
15992   while (mp->prev != NULL
15993          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15994     {
15995       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15996       mp = mp->prev;
15997     }
15998
15999   return max_mp;
16000 }
16001
16002 /* Add a constant to the minipool for a forward reference.  Returns the
16003    node added or NULL if the constant will not fit in this pool.  */
16004 static Mnode *
16005 add_minipool_forward_ref (Mfix *fix)
16006 {
16007   /* If set, max_mp is the first pool_entry that has a lower
16008      constraint than the one we are trying to add.  */
16009   Mnode *       max_mp = NULL;
16010   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16011   Mnode *       mp;
16012
16013   /* If the minipool starts before the end of FIX->INSN then this FIX
16014      can not be placed into the current pool.  Furthermore, adding the
16015      new constant pool entry may cause the pool to start FIX_SIZE bytes
16016      earlier.  */
16017   if (minipool_vector_head &&
16018       (fix->address + get_attr_length (fix->insn)
16019        >= minipool_vector_head->max_address - fix->fix_size))
16020     return NULL;
16021
16022   /* Scan the pool to see if a constant with the same value has
16023      already been added.  While we are doing this, also note the
16024      location where we must insert the constant if it doesn't already
16025      exist.  */
16026   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16027     {
16028       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16029           && fix->mode == mp->mode
16030           && (!LABEL_P (fix->value)
16031               || (CODE_LABEL_NUMBER (fix->value)
16032                   == CODE_LABEL_NUMBER (mp->value)))
16033           && rtx_equal_p (fix->value, mp->value))
16034         {
16035           /* More than one fix references this entry.  */
16036           mp->refcount++;
16037           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16038         }
16039
16040       /* Note the insertion point if necessary.  */
16041       if (max_mp == NULL
16042           && mp->max_address > max_address)
16043         max_mp = mp;
16044
16045       /* If we are inserting an 8-bytes aligned quantity and
16046          we have not already found an insertion point, then
16047          make sure that all such 8-byte aligned quantities are
16048          placed at the start of the pool.  */
16049       if (ARM_DOUBLEWORD_ALIGN
16050           && max_mp == NULL
16051           && fix->fix_size >= 8
16052           && mp->fix_size < 8)
16053         {
16054           max_mp = mp;
16055           max_address = mp->max_address;
16056         }
16057     }
16058
16059   /* The value is not currently in the minipool, so we need to create
16060      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16061      the end of the list since the placement is less constrained than
16062      any existing entry.  Otherwise, we insert the new fix before
16063      MAX_MP and, if necessary, adjust the constraints on the other
16064      entries.  */
16065   mp = XNEW (Mnode);
16066   mp->fix_size = fix->fix_size;
16067   mp->mode = fix->mode;
16068   mp->value = fix->value;
16069   mp->refcount = 1;
16070   /* Not yet required for a backwards ref.  */
16071   mp->min_address = -65536;
16072
16073   if (max_mp == NULL)
16074     {
16075       mp->max_address = max_address;
16076       mp->next = NULL;
16077       mp->prev = minipool_vector_tail;
16078
16079       if (mp->prev == NULL)
16080         {
16081           minipool_vector_head = mp;
16082           minipool_vector_label = gen_label_rtx ();
16083         }
16084       else
16085         mp->prev->next = mp;
16086
16087       minipool_vector_tail = mp;
16088     }
16089   else
16090     {
16091       if (max_address > max_mp->max_address - mp->fix_size)
16092         mp->max_address = max_mp->max_address - mp->fix_size;
16093       else
16094         mp->max_address = max_address;
16095
16096       mp->next = max_mp;
16097       mp->prev = max_mp->prev;
16098       max_mp->prev = mp;
16099       if (mp->prev != NULL)
16100         mp->prev->next = mp;
16101       else
16102         minipool_vector_head = mp;
16103     }
16104
16105   /* Save the new entry.  */
16106   max_mp = mp;
16107
16108   /* Scan over the preceding entries and adjust their addresses as
16109      required.  */
16110   while (mp->prev != NULL
16111          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16112     {
16113       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16114       mp = mp->prev;
16115     }
16116
16117   return max_mp;
16118 }
16119
16120 static Mnode *
16121 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16122                                 HOST_WIDE_INT  min_address)
16123 {
16124   HOST_WIDE_INT offset;
16125
16126   /* The code below assumes these are different.  */
16127   gcc_assert (mp != min_mp);
16128
16129   if (min_mp == NULL)
16130     {
16131       if (min_address > mp->min_address)
16132         mp->min_address = min_address;
16133     }
16134   else
16135     {
16136       /* We will adjust this below if it is too loose.  */
16137       mp->min_address = min_address;
16138
16139       /* Unlink MP from its current position.  Since min_mp is non-null,
16140          mp->next must be non-null.  */
16141       mp->next->prev = mp->prev;
16142       if (mp->prev != NULL)
16143         mp->prev->next = mp->next;
16144       else
16145         minipool_vector_head = mp->next;
16146
16147       /* Reinsert it after MIN_MP.  */
16148       mp->prev = min_mp;
16149       mp->next = min_mp->next;
16150       min_mp->next = mp;
16151       if (mp->next != NULL)
16152         mp->next->prev = mp;
16153       else
16154         minipool_vector_tail = mp;
16155     }
16156
16157   min_mp = mp;
16158
16159   offset = 0;
16160   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16161     {
16162       mp->offset = offset;
16163       if (mp->refcount > 0)
16164         offset += mp->fix_size;
16165
16166       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16167         mp->next->min_address = mp->min_address + mp->fix_size;
16168     }
16169
16170   return min_mp;
16171 }
16172
16173 /* Add a constant to the minipool for a backward reference.  Returns the
16174    node added or NULL if the constant will not fit in this pool.
16175
16176    Note that the code for insertion for a backwards reference can be
16177    somewhat confusing because the calculated offsets for each fix do
16178    not take into account the size of the pool (which is still under
16179    construction.  */
16180 static Mnode *
16181 add_minipool_backward_ref (Mfix *fix)
16182 {
16183   /* If set, min_mp is the last pool_entry that has a lower constraint
16184      than the one we are trying to add.  */
16185   Mnode *min_mp = NULL;
16186   /* This can be negative, since it is only a constraint.  */
16187   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16188   Mnode *mp;
16189
16190   /* If we can't reach the current pool from this insn, or if we can't
16191      insert this entry at the end of the pool without pushing other
16192      fixes out of range, then we don't try.  This ensures that we
16193      can't fail later on.  */
16194   if (min_address >= minipool_barrier->address
16195       || (minipool_vector_tail->min_address + fix->fix_size
16196           >= minipool_barrier->address))
16197     return NULL;
16198
16199   /* Scan the pool to see if a constant with the same value has
16200      already been added.  While we are doing this, also note the
16201      location where we must insert the constant if it doesn't already
16202      exist.  */
16203   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16204     {
16205       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16206           && fix->mode == mp->mode
16207           && (!LABEL_P (fix->value)
16208               || (CODE_LABEL_NUMBER (fix->value)
16209                   == CODE_LABEL_NUMBER (mp->value)))
16210           && rtx_equal_p (fix->value, mp->value)
16211           /* Check that there is enough slack to move this entry to the
16212              end of the table (this is conservative).  */
16213           && (mp->max_address
16214               > (minipool_barrier->address
16215                  + minipool_vector_tail->offset
16216                  + minipool_vector_tail->fix_size)))
16217         {
16218           mp->refcount++;
16219           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16220         }
16221
16222       if (min_mp != NULL)
16223         mp->min_address += fix->fix_size;
16224       else
16225         {
16226           /* Note the insertion point if necessary.  */
16227           if (mp->min_address < min_address)
16228             {
16229               /* For now, we do not allow the insertion of 8-byte alignment
16230                  requiring nodes anywhere but at the start of the pool.  */
16231               if (ARM_DOUBLEWORD_ALIGN
16232                   && fix->fix_size >= 8 && mp->fix_size < 8)
16233                 return NULL;
16234               else
16235                 min_mp = mp;
16236             }
16237           else if (mp->max_address
16238                    < minipool_barrier->address + mp->offset + fix->fix_size)
16239             {
16240               /* Inserting before this entry would push the fix beyond
16241                  its maximum address (which can happen if we have
16242                  re-located a forwards fix); force the new fix to come
16243                  after it.  */
16244               if (ARM_DOUBLEWORD_ALIGN
16245                   && fix->fix_size >= 8 && mp->fix_size < 8)
16246                 return NULL;
16247               else
16248                 {
16249                   min_mp = mp;
16250                   min_address = mp->min_address + fix->fix_size;
16251                 }
16252             }
16253           /* Do not insert a non-8-byte aligned quantity before 8-byte
16254              aligned quantities.  */
16255           else if (ARM_DOUBLEWORD_ALIGN
16256                    && fix->fix_size < 8
16257                    && mp->fix_size >= 8)
16258             {
16259               min_mp = mp;
16260               min_address = mp->min_address + fix->fix_size;
16261             }
16262         }
16263     }
16264
16265   /* We need to create a new entry.  */
16266   mp = XNEW (Mnode);
16267   mp->fix_size = fix->fix_size;
16268   mp->mode = fix->mode;
16269   mp->value = fix->value;
16270   mp->refcount = 1;
16271   mp->max_address = minipool_barrier->address + 65536;
16272
16273   mp->min_address = min_address;
16274
16275   if (min_mp == NULL)
16276     {
16277       mp->prev = NULL;
16278       mp->next = minipool_vector_head;
16279
16280       if (mp->next == NULL)
16281         {
16282           minipool_vector_tail = mp;
16283           minipool_vector_label = gen_label_rtx ();
16284         }
16285       else
16286         mp->next->prev = mp;
16287
16288       minipool_vector_head = mp;
16289     }
16290   else
16291     {
16292       mp->next = min_mp->next;
16293       mp->prev = min_mp;
16294       min_mp->next = mp;
16295
16296       if (mp->next != NULL)
16297         mp->next->prev = mp;
16298       else
16299         minipool_vector_tail = mp;
16300     }
16301
16302   /* Save the new entry.  */
16303   min_mp = mp;
16304
16305   if (mp->prev)
16306     mp = mp->prev;
16307   else
16308     mp->offset = 0;
16309
16310   /* Scan over the following entries and adjust their offsets.  */
16311   while (mp->next != NULL)
16312     {
16313       if (mp->next->min_address < mp->min_address + mp->fix_size)
16314         mp->next->min_address = mp->min_address + mp->fix_size;
16315
16316       if (mp->refcount)
16317         mp->next->offset = mp->offset + mp->fix_size;
16318       else
16319         mp->next->offset = mp->offset;
16320
16321       mp = mp->next;
16322     }
16323
16324   return min_mp;
16325 }
16326
16327 static void
16328 assign_minipool_offsets (Mfix *barrier)
16329 {
16330   HOST_WIDE_INT offset = 0;
16331   Mnode *mp;
16332
16333   minipool_barrier = barrier;
16334
16335   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16336     {
16337       mp->offset = offset;
16338
16339       if (mp->refcount > 0)
16340         offset += mp->fix_size;
16341     }
16342 }
16343
16344 /* Output the literal table */
16345 static void
16346 dump_minipool (rtx_insn *scan)
16347 {
16348   Mnode * mp;
16349   Mnode * nmp;
16350   int align64 = 0;
16351
16352   if (ARM_DOUBLEWORD_ALIGN)
16353     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16354       if (mp->refcount > 0 && mp->fix_size >= 8)
16355         {
16356           align64 = 1;
16357           break;
16358         }
16359
16360   if (dump_file)
16361     fprintf (dump_file,
16362              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16363              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16364
16365   scan = emit_label_after (gen_label_rtx (), scan);
16366   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16367   scan = emit_label_after (minipool_vector_label, scan);
16368
16369   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16370     {
16371       if (mp->refcount > 0)
16372         {
16373           if (dump_file)
16374             {
16375               fprintf (dump_file,
16376                        ";;  Offset %u, min %ld, max %ld ",
16377                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16378                        (unsigned long) mp->max_address);
16379               arm_print_value (dump_file, mp->value);
16380               fputc ('\n', dump_file);
16381             }
16382
16383           rtx val = copy_rtx (mp->value);
16384
16385           switch (GET_MODE_SIZE (mp->mode))
16386             {
16387 #ifdef HAVE_consttable_1
16388             case 1:
16389               scan = emit_insn_after (gen_consttable_1 (val), scan);
16390               break;
16391
16392 #endif
16393 #ifdef HAVE_consttable_2
16394             case 2:
16395               scan = emit_insn_after (gen_consttable_2 (val), scan);
16396               break;
16397
16398 #endif
16399 #ifdef HAVE_consttable_4
16400             case 4:
16401               scan = emit_insn_after (gen_consttable_4 (val), scan);
16402               break;
16403
16404 #endif
16405 #ifdef HAVE_consttable_8
16406             case 8:
16407               scan = emit_insn_after (gen_consttable_8 (val), scan);
16408               break;
16409
16410 #endif
16411 #ifdef HAVE_consttable_16
16412             case 16:
16413               scan = emit_insn_after (gen_consttable_16 (val), scan);
16414               break;
16415
16416 #endif
16417             default:
16418               gcc_unreachable ();
16419             }
16420         }
16421
16422       nmp = mp->next;
16423       free (mp);
16424     }
16425
16426   minipool_vector_head = minipool_vector_tail = NULL;
16427   scan = emit_insn_after (gen_consttable_end (), scan);
16428   scan = emit_barrier_after (scan);
16429 }
16430
16431 /* Return the cost of forcibly inserting a barrier after INSN.  */
16432 static int
16433 arm_barrier_cost (rtx_insn *insn)
16434 {
16435   /* Basing the location of the pool on the loop depth is preferable,
16436      but at the moment, the basic block information seems to be
16437      corrupt by this stage of the compilation.  */
16438   int base_cost = 50;
16439   rtx_insn *next = next_nonnote_insn (insn);
16440
16441   if (next != NULL && LABEL_P (next))
16442     base_cost -= 20;
16443
16444   switch (GET_CODE (insn))
16445     {
16446     case CODE_LABEL:
16447       /* It will always be better to place the table before the label, rather
16448          than after it.  */
16449       return 50;
16450
16451     case INSN:
16452     case CALL_INSN:
16453       return base_cost;
16454
16455     case JUMP_INSN:
16456       return base_cost - 10;
16457
16458     default:
16459       return base_cost + 10;
16460     }
16461 }
16462
16463 /* Find the best place in the insn stream in the range
16464    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16465    Create the barrier by inserting a jump and add a new fix entry for
16466    it.  */
16467 static Mfix *
16468 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16469 {
16470   HOST_WIDE_INT count = 0;
16471   rtx_barrier *barrier;
16472   rtx_insn *from = fix->insn;
16473   /* The instruction after which we will insert the jump.  */
16474   rtx_insn *selected = NULL;
16475   int selected_cost;
16476   /* The address at which the jump instruction will be placed.  */
16477   HOST_WIDE_INT selected_address;
16478   Mfix * new_fix;
16479   HOST_WIDE_INT max_count = max_address - fix->address;
16480   rtx_code_label *label = gen_label_rtx ();
16481
16482   selected_cost = arm_barrier_cost (from);
16483   selected_address = fix->address;
16484
16485   while (from && count < max_count)
16486     {
16487       rtx_jump_table_data *tmp;
16488       int new_cost;
16489
16490       /* This code shouldn't have been called if there was a natural barrier
16491          within range.  */
16492       gcc_assert (!BARRIER_P (from));
16493
16494       /* Count the length of this insn.  This must stay in sync with the
16495          code that pushes minipool fixes.  */
16496       if (LABEL_P (from))
16497         count += get_label_padding (from);
16498       else
16499         count += get_attr_length (from);
16500
16501       /* If there is a jump table, add its length.  */
16502       if (tablejump_p (from, NULL, &tmp))
16503         {
16504           count += get_jump_table_size (tmp);
16505
16506           /* Jump tables aren't in a basic block, so base the cost on
16507              the dispatch insn.  If we select this location, we will
16508              still put the pool after the table.  */
16509           new_cost = arm_barrier_cost (from);
16510
16511           if (count < max_count
16512               && (!selected || new_cost <= selected_cost))
16513             {
16514               selected = tmp;
16515               selected_cost = new_cost;
16516               selected_address = fix->address + count;
16517             }
16518
16519           /* Continue after the dispatch table.  */
16520           from = NEXT_INSN (tmp);
16521           continue;
16522         }
16523
16524       new_cost = arm_barrier_cost (from);
16525
16526       if (count < max_count
16527           && (!selected || new_cost <= selected_cost))
16528         {
16529           selected = from;
16530           selected_cost = new_cost;
16531           selected_address = fix->address + count;
16532         }
16533
16534       from = NEXT_INSN (from);
16535     }
16536
16537   /* Make sure that we found a place to insert the jump.  */
16538   gcc_assert (selected);
16539
16540   /* Create a new JUMP_INSN that branches around a barrier.  */
16541   from = emit_jump_insn_after (gen_jump (label), selected);
16542   JUMP_LABEL (from) = label;
16543   barrier = emit_barrier_after (from);
16544   emit_label_after (label, barrier);
16545
16546   /* Create a minipool barrier entry for the new barrier.  */
16547   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16548   new_fix->insn = barrier;
16549   new_fix->address = selected_address;
16550   new_fix->next = fix->next;
16551   fix->next = new_fix;
16552
16553   return new_fix;
16554 }
16555
16556 /* Record that there is a natural barrier in the insn stream at
16557    ADDRESS.  */
16558 static void
16559 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16560 {
16561   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16562
16563   fix->insn = insn;
16564   fix->address = address;
16565
16566   fix->next = NULL;
16567   if (minipool_fix_head != NULL)
16568     minipool_fix_tail->next = fix;
16569   else
16570     minipool_fix_head = fix;
16571
16572   minipool_fix_tail = fix;
16573 }
16574
16575 /* Record INSN, which will need fixing up to load a value from the
16576    minipool.  ADDRESS is the offset of the insn since the start of the
16577    function; LOC is a pointer to the part of the insn which requires
16578    fixing; VALUE is the constant that must be loaded, which is of type
16579    MODE.  */
16580 static void
16581 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16582                    machine_mode mode, rtx value)
16583 {
16584   gcc_assert (!arm_disable_literal_pool);
16585   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16586
16587   fix->insn = insn;
16588   fix->address = address;
16589   fix->loc = loc;
16590   fix->mode = mode;
16591   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16592   fix->value = value;
16593   fix->forwards = get_attr_pool_range (insn);
16594   fix->backwards = get_attr_neg_pool_range (insn);
16595   fix->minipool = NULL;
16596
16597   /* If an insn doesn't have a range defined for it, then it isn't
16598      expecting to be reworked by this code.  Better to stop now than
16599      to generate duff assembly code.  */
16600   gcc_assert (fix->forwards || fix->backwards);
16601
16602   /* If an entry requires 8-byte alignment then assume all constant pools
16603      require 4 bytes of padding.  Trying to do this later on a per-pool
16604      basis is awkward because existing pool entries have to be modified.  */
16605   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16606     minipool_pad = 4;
16607
16608   if (dump_file)
16609     {
16610       fprintf (dump_file,
16611                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16612                GET_MODE_NAME (mode),
16613                INSN_UID (insn), (unsigned long) address,
16614                -1 * (long)fix->backwards, (long)fix->forwards);
16615       arm_print_value (dump_file, fix->value);
16616       fprintf (dump_file, "\n");
16617     }
16618
16619   /* Add it to the chain of fixes.  */
16620   fix->next = NULL;
16621
16622   if (minipool_fix_head != NULL)
16623     minipool_fix_tail->next = fix;
16624   else
16625     minipool_fix_head = fix;
16626
16627   minipool_fix_tail = fix;
16628 }
16629
16630 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16631    Returns the number of insns needed, or 99 if we always want to synthesize
16632    the value.  */
16633 int
16634 arm_max_const_double_inline_cost ()
16635 {
16636   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16637 }
16638
16639 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16640    Returns the number of insns needed, or 99 if we don't know how to
16641    do it.  */
16642 int
16643 arm_const_double_inline_cost (rtx val)
16644 {
16645   rtx lowpart, highpart;
16646   machine_mode mode;
16647
16648   mode = GET_MODE (val);
16649
16650   if (mode == VOIDmode)
16651     mode = DImode;
16652
16653   gcc_assert (GET_MODE_SIZE (mode) == 8);
16654
16655   lowpart = gen_lowpart (SImode, val);
16656   highpart = gen_highpart_mode (SImode, mode, val);
16657
16658   gcc_assert (CONST_INT_P (lowpart));
16659   gcc_assert (CONST_INT_P (highpart));
16660
16661   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16662                             NULL_RTX, NULL_RTX, 0, 0)
16663           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16664                               NULL_RTX, NULL_RTX, 0, 0));
16665 }
16666
16667 /* Cost of loading a SImode constant.  */
16668 static inline int
16669 arm_const_inline_cost (enum rtx_code code, rtx val)
16670 {
16671   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16672                            NULL_RTX, NULL_RTX, 1, 0);
16673 }
16674
16675 /* Return true if it is worthwhile to split a 64-bit constant into two
16676    32-bit operations.  This is the case if optimizing for size, or
16677    if we have load delay slots, or if one 32-bit part can be done with
16678    a single data operation.  */
16679 bool
16680 arm_const_double_by_parts (rtx val)
16681 {
16682   machine_mode mode = GET_MODE (val);
16683   rtx part;
16684
16685   if (optimize_size || arm_ld_sched)
16686     return true;
16687
16688   if (mode == VOIDmode)
16689     mode = DImode;
16690
16691   part = gen_highpart_mode (SImode, mode, val);
16692
16693   gcc_assert (CONST_INT_P (part));
16694
16695   if (const_ok_for_arm (INTVAL (part))
16696       || const_ok_for_arm (~INTVAL (part)))
16697     return true;
16698
16699   part = gen_lowpart (SImode, val);
16700
16701   gcc_assert (CONST_INT_P (part));
16702
16703   if (const_ok_for_arm (INTVAL (part))
16704       || const_ok_for_arm (~INTVAL (part)))
16705     return true;
16706
16707   return false;
16708 }
16709
16710 /* Return true if it is possible to inline both the high and low parts
16711    of a 64-bit constant into 32-bit data processing instructions.  */
16712 bool
16713 arm_const_double_by_immediates (rtx val)
16714 {
16715   machine_mode mode = GET_MODE (val);
16716   rtx part;
16717
16718   if (mode == VOIDmode)
16719     mode = DImode;
16720
16721   part = gen_highpart_mode (SImode, mode, val);
16722
16723   gcc_assert (CONST_INT_P (part));
16724
16725   if (!const_ok_for_arm (INTVAL (part)))
16726     return false;
16727
16728   part = gen_lowpart (SImode, val);
16729
16730   gcc_assert (CONST_INT_P (part));
16731
16732   if (!const_ok_for_arm (INTVAL (part)))
16733     return false;
16734
16735   return true;
16736 }
16737
16738 /* Scan INSN and note any of its operands that need fixing.
16739    If DO_PUSHES is false we do not actually push any of the fixups
16740    needed.  */
16741 static void
16742 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16743 {
16744   int opno;
16745
16746   extract_constrain_insn (insn);
16747
16748   if (recog_data.n_alternatives == 0)
16749     return;
16750
16751   /* Fill in recog_op_alt with information about the constraints of
16752      this insn.  */
16753   preprocess_constraints (insn);
16754
16755   const operand_alternative *op_alt = which_op_alt ();
16756   for (opno = 0; opno < recog_data.n_operands; opno++)
16757     {
16758       /* Things we need to fix can only occur in inputs.  */
16759       if (recog_data.operand_type[opno] != OP_IN)
16760         continue;
16761
16762       /* If this alternative is a memory reference, then any mention
16763          of constants in this alternative is really to fool reload
16764          into allowing us to accept one there.  We need to fix them up
16765          now so that we output the right code.  */
16766       if (op_alt[opno].memory_ok)
16767         {
16768           rtx op = recog_data.operand[opno];
16769
16770           if (CONSTANT_P (op))
16771             {
16772               if (do_pushes)
16773                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16774                                    recog_data.operand_mode[opno], op);
16775             }
16776           else if (MEM_P (op)
16777                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16778                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16779             {
16780               if (do_pushes)
16781                 {
16782                   rtx cop = avoid_constant_pool_reference (op);
16783
16784                   /* Casting the address of something to a mode narrower
16785                      than a word can cause avoid_constant_pool_reference()
16786                      to return the pool reference itself.  That's no good to
16787                      us here.  Lets just hope that we can use the
16788                      constant pool value directly.  */
16789                   if (op == cop)
16790                     cop = get_pool_constant (XEXP (op, 0));
16791
16792                   push_minipool_fix (insn, address,
16793                                      recog_data.operand_loc[opno],
16794                                      recog_data.operand_mode[opno], cop);
16795                 }
16796
16797             }
16798         }
16799     }
16800
16801   return;
16802 }
16803
16804 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16805    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16806    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16807    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16808    or four masks, depending on whether it is being computed for a
16809    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16810    respectively.  The tree for the type of the argument or a field within an
16811    argument is passed in ARG_TYPE, the current register this argument or field
16812    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16813    argument or field starts at is passed in STARTING_BIT and the last used bit
16814    is kept in LAST_USED_BIT which is also updated accordingly.  */
16815
16816 static unsigned HOST_WIDE_INT
16817 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16818                                uint32_t * padding_bits_to_clear,
16819                                unsigned starting_bit, int * last_used_bit)
16820
16821 {
16822   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16823
16824   if (TREE_CODE (arg_type) == RECORD_TYPE)
16825     {
16826       unsigned current_bit = starting_bit;
16827       tree field;
16828       long int offset, size;
16829
16830
16831       field = TYPE_FIELDS (arg_type);
16832       while (field)
16833         {
16834           /* The offset within a structure is always an offset from
16835              the start of that structure.  Make sure we take that into the
16836              calculation of the register based offset that we use here.  */
16837           offset = starting_bit;
16838           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16839           offset %= 32;
16840
16841           /* This is the actual size of the field, for bitfields this is the
16842              bitfield width and not the container size.  */
16843           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16844
16845           if (*last_used_bit != offset)
16846             {
16847               if (offset < *last_used_bit)
16848                 {
16849                   /* This field's offset is before the 'last_used_bit', that
16850                      means this field goes on the next register.  So we need to
16851                      pad the rest of the current register and increase the
16852                      register number.  */
16853                   uint32_t mask;
16854                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16855                   mask++;
16856
16857                   padding_bits_to_clear[*regno] |= mask;
16858                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16859                   (*regno)++;
16860                 }
16861               else
16862                 {
16863                   /* Otherwise we pad the bits between the last field's end and
16864                      the start of the new field.  */
16865                   uint32_t mask;
16866
16867                   mask = ((uint32_t)-1) >> (32 - offset);
16868                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16869                   padding_bits_to_clear[*regno] |= mask;
16870                 }
16871               current_bit = offset;
16872             }
16873
16874           /* Calculate further padding bits for inner structs/unions too.  */
16875           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16876             {
16877               *last_used_bit = current_bit;
16878               not_to_clear_reg_mask
16879                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16880                                                   padding_bits_to_clear, offset,
16881                                                   last_used_bit);
16882             }
16883           else
16884             {
16885               /* Update 'current_bit' with this field's size.  If the
16886                  'current_bit' lies in a subsequent register, update 'regno' and
16887                  reset 'current_bit' to point to the current bit in that new
16888                  register.  */
16889               current_bit += size;
16890               while (current_bit >= 32)
16891                 {
16892                   current_bit-=32;
16893                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16894                   (*regno)++;
16895                 }
16896               *last_used_bit = current_bit;
16897             }
16898
16899           field = TREE_CHAIN (field);
16900         }
16901       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16902     }
16903   else if (TREE_CODE (arg_type) == UNION_TYPE)
16904     {
16905       tree field, field_t;
16906       int i, regno_t, field_size;
16907       int max_reg = -1;
16908       int max_bit = -1;
16909       uint32_t mask;
16910       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16911         = {-1, -1, -1, -1};
16912
16913       /* To compute the padding bits in a union we only consider bits as
16914          padding bits if they are always either a padding bit or fall outside a
16915          fields size for all fields in the union.  */
16916       field = TYPE_FIELDS (arg_type);
16917       while (field)
16918         {
16919           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16920             = {0U, 0U, 0U, 0U};
16921           int last_used_bit_t = *last_used_bit;
16922           regno_t = *regno;
16923           field_t = TREE_TYPE (field);
16924
16925           /* If the field's type is either a record or a union make sure to
16926              compute their padding bits too.  */
16927           if (RECORD_OR_UNION_TYPE_P (field_t))
16928             not_to_clear_reg_mask
16929               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16930                                                 &padding_bits_to_clear_t[0],
16931                                                 starting_bit, &last_used_bit_t);
16932           else
16933             {
16934               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16935               regno_t = (field_size / 32) + *regno;
16936               last_used_bit_t = (starting_bit + field_size) % 32;
16937             }
16938
16939           for (i = *regno; i < regno_t; i++)
16940             {
16941               /* For all but the last register used by this field only keep the
16942                  padding bits that were padding bits in this field.  */
16943               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16944             }
16945
16946             /* For the last register, keep all padding bits that were padding
16947                bits in this field and any padding bits that are still valid
16948                as padding bits but fall outside of this field's size.  */
16949             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16950             padding_bits_to_clear_res[regno_t]
16951               &= padding_bits_to_clear_t[regno_t] | mask;
16952
16953           /* Update the maximum size of the fields in terms of registers used
16954              ('max_reg') and the 'last_used_bit' in said register.  */
16955           if (max_reg < regno_t)
16956             {
16957               max_reg = regno_t;
16958               max_bit = last_used_bit_t;
16959             }
16960           else if (max_reg == regno_t && max_bit < last_used_bit_t)
16961             max_bit = last_used_bit_t;
16962
16963           field = TREE_CHAIN (field);
16964         }
16965
16966       /* Update the current padding_bits_to_clear using the intersection of the
16967          padding bits of all the fields.  */
16968       for (i=*regno; i < max_reg; i++)
16969         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16970
16971       /* Do not keep trailing padding bits, we do not know yet whether this
16972          is the end of the argument.  */
16973       mask = ((uint32_t) 1 << max_bit) - 1;
16974       padding_bits_to_clear[max_reg]
16975         |= padding_bits_to_clear_res[max_reg] & mask;
16976
16977       *regno = max_reg;
16978       *last_used_bit = max_bit;
16979     }
16980   else
16981     /* This function should only be used for structs and unions.  */
16982     gcc_unreachable ();
16983
16984   return not_to_clear_reg_mask;
16985 }
16986
16987 /* In the context of ARMv8-M Security Extensions, this function is used for both
16988    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16989    registers are used when returning or passing arguments, which is then
16990    returned as a mask.  It will also compute a mask to indicate padding/unused
16991    bits for each of these registers, and passes this through the
16992    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
16993    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16994    the starting register used to pass this argument or return value is passed
16995    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16996    for struct and union types.  */
16997
16998 static unsigned HOST_WIDE_INT
16999 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17000                              uint32_t * padding_bits_to_clear)
17001
17002 {
17003   int last_used_bit = 0;
17004   unsigned HOST_WIDE_INT not_to_clear_mask;
17005
17006   if (RECORD_OR_UNION_TYPE_P (arg_type))
17007     {
17008       not_to_clear_mask
17009         = comp_not_to_clear_mask_str_un (arg_type, &regno,
17010                                          padding_bits_to_clear, 0,
17011                                          &last_used_bit);
17012
17013
17014       /* If the 'last_used_bit' is not zero, that means we are still using a
17015          part of the last 'regno'.  In such cases we must clear the trailing
17016          bits.  Otherwise we are not using regno and we should mark it as to
17017          clear.  */
17018       if (last_used_bit != 0)
17019         padding_bits_to_clear[regno]
17020           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17021       else
17022         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17023     }
17024   else
17025     {
17026       not_to_clear_mask = 0;
17027       /* We are not dealing with structs nor unions.  So these arguments may be
17028          passed in floating point registers too.  In some cases a BLKmode is
17029          used when returning or passing arguments in multiple VFP registers.  */
17030       if (GET_MODE (arg_rtx) == BLKmode)
17031         {
17032           int i, arg_regs;
17033           rtx reg;
17034
17035           /* This should really only occur when dealing with the hard-float
17036              ABI.  */
17037           gcc_assert (TARGET_HARD_FLOAT_ABI);
17038
17039           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17040             {
17041               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17042               gcc_assert (REG_P (reg));
17043
17044               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17045
17046               /* If we are dealing with DF mode, make sure we don't
17047                  clear either of the registers it addresses.  */
17048               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17049               if (arg_regs > 1)
17050                 {
17051                   unsigned HOST_WIDE_INT mask;
17052                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17053                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
17054                   not_to_clear_mask |= mask;
17055                 }
17056             }
17057         }
17058       else
17059         {
17060           /* Otherwise we can rely on the MODE to determine how many registers
17061              are being used by this argument.  */
17062           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17063           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17064           if (arg_regs > 1)
17065             {
17066               unsigned HOST_WIDE_INT
17067               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17068               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17069               not_to_clear_mask |= mask;
17070             }
17071         }
17072     }
17073
17074   return not_to_clear_mask;
17075 }
17076
17077 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17078    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
17079    are to be fully cleared, using the value in register CLEARING_REG if more
17080    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17081    the bits that needs to be cleared in caller-saved core registers, with
17082    SCRATCH_REG used as a scratch register for that clearing.
17083
17084    NOTE: one of three following assertions must hold:
17085    - SCRATCH_REG is a low register
17086    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17087      in TO_CLEAR_BITMAP)
17088    - CLEARING_REG is a low register.  */
17089
17090 static void
17091 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17092                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17093 {
17094   bool saved_clearing = false;
17095   rtx saved_clearing_reg = NULL_RTX;
17096   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17097
17098   gcc_assert (arm_arch_cmse);
17099
17100   if (!bitmap_empty_p (to_clear_bitmap))
17101     {
17102       minregno = bitmap_first_set_bit (to_clear_bitmap);
17103       maxregno = bitmap_last_set_bit (to_clear_bitmap);
17104     }
17105   clearing_regno = REGNO (clearing_reg);
17106
17107   /* Clear padding bits.  */
17108   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17109   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17110     {
17111       uint64_t mask;
17112       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17113
17114       if (padding_bits_to_clear[i] == 0)
17115         continue;
17116
17117       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17118          CLEARING_REG as scratch.  */
17119       if (TARGET_THUMB1
17120           && REGNO (scratch_reg) > LAST_LO_REGNUM)
17121         {
17122           /* clearing_reg is not to be cleared, copy its value into scratch_reg
17123              such that we can use clearing_reg to clear the unused bits in the
17124              arguments.  */
17125           if ((clearing_regno > maxregno
17126                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17127               && !saved_clearing)
17128             {
17129               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17130               emit_move_insn (scratch_reg, clearing_reg);
17131               saved_clearing = true;
17132               saved_clearing_reg = scratch_reg;
17133             }
17134           scratch_reg = clearing_reg;
17135         }
17136
17137       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
17138       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17139       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17140
17141       /* Fill the top half of the negated padding_bits_to_clear[i].  */
17142       mask = (~padding_bits_to_clear[i]) >> 16;
17143       rtx16 = gen_int_mode (16, SImode);
17144       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17145       if (mask)
17146         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17147
17148       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17149     }
17150   if (saved_clearing)
17151     emit_move_insn (clearing_reg, saved_clearing_reg);
17152
17153
17154   /* Clear full registers.  */
17155
17156   /* If not marked for clearing, clearing_reg already does not contain
17157      any secret.  */
17158   if (clearing_regno <= maxregno
17159       && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17160     {
17161       emit_move_insn (clearing_reg, const0_rtx);
17162       emit_use (clearing_reg);
17163       bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17164     }
17165
17166   for (regno = minregno; regno <= maxregno; regno++)
17167     {
17168       if (!bitmap_bit_p (to_clear_bitmap, regno))
17169         continue;
17170
17171       if (IS_VFP_REGNUM (regno))
17172         {
17173           /* If regno is an even vfp register and its successor is also to
17174              be cleared, use vmov.  */
17175           if (TARGET_VFP_DOUBLE
17176               && VFP_REGNO_OK_FOR_DOUBLE (regno)
17177               && bitmap_bit_p (to_clear_bitmap, regno + 1))
17178             {
17179               emit_move_insn (gen_rtx_REG (DFmode, regno),
17180                               CONST1_RTX (DFmode));
17181               emit_use (gen_rtx_REG (DFmode, regno));
17182               regno++;
17183             }
17184           else
17185             {
17186               emit_move_insn (gen_rtx_REG (SFmode, regno),
17187                               CONST1_RTX (SFmode));
17188               emit_use (gen_rtx_REG (SFmode, regno));
17189             }
17190         }
17191       else
17192         {
17193           emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17194           emit_use (gen_rtx_REG (SImode, regno));
17195         }
17196     }
17197 }
17198
17199 /* Clears caller saved registers not used to pass arguments before a
17200    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
17201    registers is done in __gnu_cmse_nonsecure_call libcall.
17202    See libgcc/config/arm/cmse_nonsecure_call.S.  */
17203
17204 static void
17205 cmse_nonsecure_call_clear_caller_saved (void)
17206 {
17207   basic_block bb;
17208
17209   FOR_EACH_BB_FN (bb, cfun)
17210     {
17211       rtx_insn *insn;
17212
17213       FOR_BB_INSNS (bb, insn)
17214         {
17215           unsigned address_regnum, regno, maxregno =
17216             TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17217           auto_sbitmap to_clear_bitmap (maxregno + 1);
17218           rtx_insn *seq;
17219           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17220           rtx address;
17221           CUMULATIVE_ARGS args_so_far_v;
17222           cumulative_args_t args_so_far;
17223           tree arg_type, fntype;
17224           bool first_param = true;
17225           function_args_iterator args_iter;
17226           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17227
17228           if (!NONDEBUG_INSN_P (insn))
17229             continue;
17230
17231           if (!CALL_P (insn))
17232             continue;
17233
17234           pat = PATTERN (insn);
17235           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17236           call = XVECEXP (pat, 0, 0);
17237
17238           /* Get the real call RTX if the insn sets a value, ie. returns.  */
17239           if (GET_CODE (call) == SET)
17240               call = SET_SRC (call);
17241
17242           /* Check if it is a cmse_nonsecure_call.  */
17243           unspec = XEXP (call, 0);
17244           if (GET_CODE (unspec) != UNSPEC
17245               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17246             continue;
17247
17248           /* Determine the caller-saved registers we need to clear.  */
17249           bitmap_clear (to_clear_bitmap);
17250           bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17251
17252           /* Only look at the caller-saved floating point registers in case of
17253              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17254              lazy store and loads which clear both caller- and callee-saved
17255              registers.  */
17256           if (TARGET_HARD_FLOAT_ABI)
17257             {
17258               auto_sbitmap float_bitmap (maxregno + 1);
17259
17260               bitmap_clear (float_bitmap);
17261               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17262                                 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17263               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17264             }
17265
17266           /* Make sure the register used to hold the function address is not
17267              cleared.  */
17268           address = RTVEC_ELT (XVEC (unspec, 0), 0);
17269           gcc_assert (MEM_P (address));
17270           gcc_assert (REG_P (XEXP (address, 0)));
17271           address_regnum = REGNO (XEXP (address, 0));
17272           if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17273             bitmap_clear_bit (to_clear_bitmap, address_regnum);
17274
17275           /* Set basic block of call insn so that df rescan is performed on
17276              insns inserted here.  */
17277           set_block_for_insn (insn, bb);
17278           df_set_flags (DF_DEFER_INSN_RESCAN);
17279           start_sequence ();
17280
17281           /* Make sure the scheduler doesn't schedule other insns beyond
17282              here.  */
17283           emit_insn (gen_blockage ());
17284
17285           /* Walk through all arguments and clear registers appropriately.
17286           */
17287           fntype = TREE_TYPE (MEM_EXPR (address));
17288           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17289                                     NULL_TREE);
17290           args_so_far = pack_cumulative_args (&args_so_far_v);
17291           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17292             {
17293               rtx arg_rtx;
17294               uint64_t to_clear_args_mask;
17295               machine_mode arg_mode = TYPE_MODE (arg_type);
17296
17297               if (VOID_TYPE_P (arg_type))
17298                 continue;
17299
17300               if (!first_param)
17301                 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17302                                           true);
17303
17304               arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17305                                           true);
17306               gcc_assert (REG_P (arg_rtx));
17307               to_clear_args_mask
17308                 = compute_not_to_clear_mask (arg_type, arg_rtx,
17309                                              REGNO (arg_rtx),
17310                                              &padding_bits_to_clear[0]);
17311               if (to_clear_args_mask)
17312                 {
17313                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
17314                     {
17315                       if (to_clear_args_mask & (1ULL << regno))
17316                         bitmap_clear_bit (to_clear_bitmap, regno);
17317                     }
17318                 }
17319
17320               first_param = false;
17321             }
17322
17323           /* We use right shift and left shift to clear the LSB of the address
17324              we jump to instead of using bic, to avoid having to use an extra
17325              register on Thumb-1.  */
17326           clearing_reg = XEXP (address, 0);
17327           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17328           emit_insn (gen_rtx_SET (clearing_reg, shift));
17329           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17330           emit_insn (gen_rtx_SET (clearing_reg, shift));
17331
17332           /* Clear caller-saved registers that leak before doing a non-secure
17333              call.  */
17334           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17335           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17336                                 NUM_ARG_REGS, ip_reg, clearing_reg);
17337
17338           seq = get_insns ();
17339           end_sequence ();
17340           emit_insn_before (seq, insn);
17341         }
17342     }
17343 }
17344
17345 /* Rewrite move insn into subtract of 0 if the condition codes will
17346    be useful in next conditional jump insn.  */
17347
17348 static void
17349 thumb1_reorg (void)
17350 {
17351   basic_block bb;
17352
17353   FOR_EACH_BB_FN (bb, cfun)
17354     {
17355       rtx dest, src;
17356       rtx cmp, op0, op1, set = NULL;
17357       rtx_insn *prev, *insn = BB_END (bb);
17358       bool insn_clobbered = false;
17359
17360       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17361         insn = PREV_INSN (insn);
17362
17363       /* Find the last cbranchsi4_insn in basic block BB.  */
17364       if (insn == BB_HEAD (bb)
17365           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17366         continue;
17367
17368       /* Get the register with which we are comparing.  */
17369       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17370       op0 = XEXP (cmp, 0);
17371       op1 = XEXP (cmp, 1);
17372
17373       /* Check that comparison is against ZERO.  */
17374       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17375         continue;
17376
17377       /* Find the first flag setting insn before INSN in basic block BB.  */
17378       gcc_assert (insn != BB_HEAD (bb));
17379       for (prev = PREV_INSN (insn);
17380            (!insn_clobbered
17381             && prev != BB_HEAD (bb)
17382             && (NOTE_P (prev)
17383                 || DEBUG_INSN_P (prev)
17384                 || ((set = single_set (prev)) != NULL
17385                     && get_attr_conds (prev) == CONDS_NOCOND)));
17386            prev = PREV_INSN (prev))
17387         {
17388           if (reg_set_p (op0, prev))
17389             insn_clobbered = true;
17390         }
17391
17392       /* Skip if op0 is clobbered by insn other than prev. */
17393       if (insn_clobbered)
17394         continue;
17395
17396       if (!set)
17397         continue;
17398
17399       dest = SET_DEST (set);
17400       src = SET_SRC (set);
17401       if (!low_register_operand (dest, SImode)
17402           || !low_register_operand (src, SImode))
17403         continue;
17404
17405       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17406          in INSN.  Both src and dest of the move insn are checked.  */
17407       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17408         {
17409           dest = copy_rtx (dest);
17410           src = copy_rtx (src);
17411           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17412           PATTERN (prev) = gen_rtx_SET (dest, src);
17413           INSN_CODE (prev) = -1;
17414           /* Set test register in INSN to dest.  */
17415           XEXP (cmp, 0) = copy_rtx (dest);
17416           INSN_CODE (insn) = -1;
17417         }
17418     }
17419 }
17420
17421 /* Convert instructions to their cc-clobbering variant if possible, since
17422    that allows us to use smaller encodings.  */
17423
17424 static void
17425 thumb2_reorg (void)
17426 {
17427   basic_block bb;
17428   regset_head live;
17429
17430   INIT_REG_SET (&live);
17431
17432   /* We are freeing block_for_insn in the toplev to keep compatibility
17433      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17434   compute_bb_for_insn ();
17435   df_analyze ();
17436
17437   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17438
17439   FOR_EACH_BB_FN (bb, cfun)
17440     {
17441       if ((current_tune->disparage_flag_setting_t16_encodings
17442            == tune_params::DISPARAGE_FLAGS_ALL)
17443           && optimize_bb_for_speed_p (bb))
17444         continue;
17445
17446       rtx_insn *insn;
17447       Convert_Action action = SKIP;
17448       Convert_Action action_for_partial_flag_setting
17449         = ((current_tune->disparage_flag_setting_t16_encodings
17450             != tune_params::DISPARAGE_FLAGS_NEITHER)
17451            && optimize_bb_for_speed_p (bb))
17452           ? SKIP : CONV;
17453
17454       COPY_REG_SET (&live, DF_LR_OUT (bb));
17455       df_simulate_initialize_backwards (bb, &live);
17456       FOR_BB_INSNS_REVERSE (bb, insn)
17457         {
17458           if (NONJUMP_INSN_P (insn)
17459               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17460               && GET_CODE (PATTERN (insn)) == SET)
17461             {
17462               action = SKIP;
17463               rtx pat = PATTERN (insn);
17464               rtx dst = XEXP (pat, 0);
17465               rtx src = XEXP (pat, 1);
17466               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17467
17468               if (UNARY_P (src) || BINARY_P (src))
17469                   op0 = XEXP (src, 0);
17470
17471               if (BINARY_P (src))
17472                   op1 = XEXP (src, 1);
17473
17474               if (low_register_operand (dst, SImode))
17475                 {
17476                   switch (GET_CODE (src))
17477                     {
17478                     case PLUS:
17479                       /* Adding two registers and storing the result
17480                          in the first source is already a 16-bit
17481                          operation.  */
17482                       if (rtx_equal_p (dst, op0)
17483                           && register_operand (op1, SImode))
17484                         break;
17485
17486                       if (low_register_operand (op0, SImode))
17487                         {
17488                           /* ADDS <Rd>,<Rn>,<Rm>  */
17489                           if (low_register_operand (op1, SImode))
17490                             action = CONV;
17491                           /* ADDS <Rdn>,#<imm8>  */
17492                           /* SUBS <Rdn>,#<imm8>  */
17493                           else if (rtx_equal_p (dst, op0)
17494                                    && CONST_INT_P (op1)
17495                                    && IN_RANGE (INTVAL (op1), -255, 255))
17496                             action = CONV;
17497                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17498                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17499                           else if (CONST_INT_P (op1)
17500                                    && IN_RANGE (INTVAL (op1), -7, 7))
17501                             action = CONV;
17502                         }
17503                       /* ADCS <Rd>, <Rn>  */
17504                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17505                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17506                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17507                                                        SImode)
17508                               && COMPARISON_P (op1)
17509                               && cc_register (XEXP (op1, 0), VOIDmode)
17510                               && maybe_get_arm_condition_code (op1) == ARM_CS
17511                               && XEXP (op1, 1) == const0_rtx)
17512                         action = CONV;
17513                       break;
17514
17515                     case MINUS:
17516                       /* RSBS <Rd>,<Rn>,#0
17517                          Not handled here: see NEG below.  */
17518                       /* SUBS <Rd>,<Rn>,#<imm3>
17519                          SUBS <Rdn>,#<imm8>
17520                          Not handled here: see PLUS above.  */
17521                       /* SUBS <Rd>,<Rn>,<Rm>  */
17522                       if (low_register_operand (op0, SImode)
17523                           && low_register_operand (op1, SImode))
17524                             action = CONV;
17525                       break;
17526
17527                     case MULT:
17528                       /* MULS <Rdm>,<Rn>,<Rdm>
17529                          As an exception to the rule, this is only used
17530                          when optimizing for size since MULS is slow on all
17531                          known implementations.  We do not even want to use
17532                          MULS in cold code, if optimizing for speed, so we
17533                          test the global flag here.  */
17534                       if (!optimize_size)
17535                         break;
17536                       /* Fall through.  */
17537                     case AND:
17538                     case IOR:
17539                     case XOR:
17540                       /* ANDS <Rdn>,<Rm>  */
17541                       if (rtx_equal_p (dst, op0)
17542                           && low_register_operand (op1, SImode))
17543                         action = action_for_partial_flag_setting;
17544                       else if (rtx_equal_p (dst, op1)
17545                                && low_register_operand (op0, SImode))
17546                         action = action_for_partial_flag_setting == SKIP
17547                                  ? SKIP : SWAP_CONV;
17548                       break;
17549
17550                     case ASHIFTRT:
17551                     case ASHIFT:
17552                     case LSHIFTRT:
17553                       /* ASRS <Rdn>,<Rm> */
17554                       /* LSRS <Rdn>,<Rm> */
17555                       /* LSLS <Rdn>,<Rm> */
17556                       if (rtx_equal_p (dst, op0)
17557                           && low_register_operand (op1, SImode))
17558                         action = action_for_partial_flag_setting;
17559                       /* ASRS <Rd>,<Rm>,#<imm5> */
17560                       /* LSRS <Rd>,<Rm>,#<imm5> */
17561                       /* LSLS <Rd>,<Rm>,#<imm5> */
17562                       else if (low_register_operand (op0, SImode)
17563                                && CONST_INT_P (op1)
17564                                && IN_RANGE (INTVAL (op1), 0, 31))
17565                         action = action_for_partial_flag_setting;
17566                       break;
17567
17568                     case ROTATERT:
17569                       /* RORS <Rdn>,<Rm>  */
17570                       if (rtx_equal_p (dst, op0)
17571                           && low_register_operand (op1, SImode))
17572                         action = action_for_partial_flag_setting;
17573                       break;
17574
17575                     case NOT:
17576                       /* MVNS <Rd>,<Rm>  */
17577                       if (low_register_operand (op0, SImode))
17578                         action = action_for_partial_flag_setting;
17579                       break;
17580
17581                     case NEG:
17582                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17583                       if (low_register_operand (op0, SImode))
17584                         action = CONV;
17585                       break;
17586
17587                     case CONST_INT:
17588                       /* MOVS <Rd>,#<imm8>  */
17589                       if (CONST_INT_P (src)
17590                           && IN_RANGE (INTVAL (src), 0, 255))
17591                         action = action_for_partial_flag_setting;
17592                       break;
17593
17594                     case REG:
17595                       /* MOVS and MOV<c> with registers have different
17596                          encodings, so are not relevant here.  */
17597                       break;
17598
17599                     default:
17600                       break;
17601                     }
17602                 }
17603
17604               if (action != SKIP)
17605                 {
17606                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17607                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17608                   rtvec vec;
17609
17610                   if (action == SWAP_CONV)
17611                     {
17612                       src = copy_rtx (src);
17613                       XEXP (src, 0) = op1;
17614                       XEXP (src, 1) = op0;
17615                       pat = gen_rtx_SET (dst, src);
17616                       vec = gen_rtvec (2, pat, clobber);
17617                     }
17618                   else /* action == CONV */
17619                     vec = gen_rtvec (2, pat, clobber);
17620
17621                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17622                   INSN_CODE (insn) = -1;
17623                 }
17624             }
17625
17626           if (NONDEBUG_INSN_P (insn))
17627             df_simulate_one_insn_backwards (bb, insn, &live);
17628         }
17629     }
17630
17631   CLEAR_REG_SET (&live);
17632 }
17633
17634 /* Gcc puts the pool in the wrong place for ARM, since we can only
17635    load addresses a limited distance around the pc.  We do some
17636    special munging to move the constant pool values to the correct
17637    point in the code.  */
17638 static void
17639 arm_reorg (void)
17640 {
17641   rtx_insn *insn;
17642   HOST_WIDE_INT address = 0;
17643   Mfix * fix;
17644
17645   if (use_cmse)
17646     cmse_nonsecure_call_clear_caller_saved ();
17647   if (TARGET_THUMB1)
17648     thumb1_reorg ();
17649   else if (TARGET_THUMB2)
17650     thumb2_reorg ();
17651
17652   /* Ensure all insns that must be split have been split at this point.
17653      Otherwise, the pool placement code below may compute incorrect
17654      insn lengths.  Note that when optimizing, all insns have already
17655      been split at this point.  */
17656   if (!optimize)
17657     split_all_insns_noflow ();
17658
17659   /* Make sure we do not attempt to create a literal pool even though it should
17660      no longer be necessary to create any.  */
17661   if (arm_disable_literal_pool)
17662     return ;
17663
17664   minipool_fix_head = minipool_fix_tail = NULL;
17665
17666   /* The first insn must always be a note, or the code below won't
17667      scan it properly.  */
17668   insn = get_insns ();
17669   gcc_assert (NOTE_P (insn));
17670   minipool_pad = 0;
17671
17672   /* Scan all the insns and record the operands that will need fixing.  */
17673   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17674     {
17675       if (BARRIER_P (insn))
17676         push_minipool_barrier (insn, address);
17677       else if (INSN_P (insn))
17678         {
17679           rtx_jump_table_data *table;
17680
17681           note_invalid_constants (insn, address, true);
17682           address += get_attr_length (insn);
17683
17684           /* If the insn is a vector jump, add the size of the table
17685              and skip the table.  */
17686           if (tablejump_p (insn, NULL, &table))
17687             {
17688               address += get_jump_table_size (table);
17689               insn = table;
17690             }
17691         }
17692       else if (LABEL_P (insn))
17693         /* Add the worst-case padding due to alignment.  We don't add
17694            the _current_ padding because the minipool insertions
17695            themselves might change it.  */
17696         address += get_label_padding (insn);
17697     }
17698
17699   fix = minipool_fix_head;
17700
17701   /* Now scan the fixups and perform the required changes.  */
17702   while (fix)
17703     {
17704       Mfix * ftmp;
17705       Mfix * fdel;
17706       Mfix *  last_added_fix;
17707       Mfix * last_barrier = NULL;
17708       Mfix * this_fix;
17709
17710       /* Skip any further barriers before the next fix.  */
17711       while (fix && BARRIER_P (fix->insn))
17712         fix = fix->next;
17713
17714       /* No more fixes.  */
17715       if (fix == NULL)
17716         break;
17717
17718       last_added_fix = NULL;
17719
17720       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17721         {
17722           if (BARRIER_P (ftmp->insn))
17723             {
17724               if (ftmp->address >= minipool_vector_head->max_address)
17725                 break;
17726
17727               last_barrier = ftmp;
17728             }
17729           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17730             break;
17731
17732           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17733         }
17734
17735       /* If we found a barrier, drop back to that; any fixes that we
17736          could have reached but come after the barrier will now go in
17737          the next mini-pool.  */
17738       if (last_barrier != NULL)
17739         {
17740           /* Reduce the refcount for those fixes that won't go into this
17741              pool after all.  */
17742           for (fdel = last_barrier->next;
17743                fdel && fdel != ftmp;
17744                fdel = fdel->next)
17745             {
17746               fdel->minipool->refcount--;
17747               fdel->minipool = NULL;
17748             }
17749
17750           ftmp = last_barrier;
17751         }
17752       else
17753         {
17754           /* ftmp is first fix that we can't fit into this pool and
17755              there no natural barriers that we could use.  Insert a
17756              new barrier in the code somewhere between the previous
17757              fix and this one, and arrange to jump around it.  */
17758           HOST_WIDE_INT max_address;
17759
17760           /* The last item on the list of fixes must be a barrier, so
17761              we can never run off the end of the list of fixes without
17762              last_barrier being set.  */
17763           gcc_assert (ftmp);
17764
17765           max_address = minipool_vector_head->max_address;
17766           /* Check that there isn't another fix that is in range that
17767              we couldn't fit into this pool because the pool was
17768              already too large: we need to put the pool before such an
17769              instruction.  The pool itself may come just after the
17770              fix because create_fix_barrier also allows space for a
17771              jump instruction.  */
17772           if (ftmp->address < max_address)
17773             max_address = ftmp->address + 1;
17774
17775           last_barrier = create_fix_barrier (last_added_fix, max_address);
17776         }
17777
17778       assign_minipool_offsets (last_barrier);
17779
17780       while (ftmp)
17781         {
17782           if (!BARRIER_P (ftmp->insn)
17783               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17784                   == NULL))
17785             break;
17786
17787           ftmp = ftmp->next;
17788         }
17789
17790       /* Scan over the fixes we have identified for this pool, fixing them
17791          up and adding the constants to the pool itself.  */
17792       for (this_fix = fix; this_fix && ftmp != this_fix;
17793            this_fix = this_fix->next)
17794         if (!BARRIER_P (this_fix->insn))
17795           {
17796             rtx addr
17797               = plus_constant (Pmode,
17798                                gen_rtx_LABEL_REF (VOIDmode,
17799                                                   minipool_vector_label),
17800                                this_fix->minipool->offset);
17801             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17802           }
17803
17804       dump_minipool (last_barrier->insn);
17805       fix = ftmp;
17806     }
17807
17808   /* From now on we must synthesize any constants that we can't handle
17809      directly.  This can happen if the RTL gets split during final
17810      instruction generation.  */
17811   cfun->machine->after_arm_reorg = 1;
17812
17813   /* Free the minipool memory.  */
17814   obstack_free (&minipool_obstack, minipool_startobj);
17815 }
17816 \f
17817 /* Routines to output assembly language.  */
17818
17819 /* Return string representation of passed in real value.  */
17820 static const char *
17821 fp_const_from_val (REAL_VALUE_TYPE *r)
17822 {
17823   if (!fp_consts_inited)
17824     init_fp_table ();
17825
17826   gcc_assert (real_equal (r, &value_fp0));
17827   return "0";
17828 }
17829
17830 /* OPERANDS[0] is the entire list of insns that constitute pop,
17831    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17832    is in the list, UPDATE is true iff the list contains explicit
17833    update of base register.  */
17834 void
17835 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17836                          bool update)
17837 {
17838   int i;
17839   char pattern[100];
17840   int offset;
17841   const char *conditional;
17842   int num_saves = XVECLEN (operands[0], 0);
17843   unsigned int regno;
17844   unsigned int regno_base = REGNO (operands[1]);
17845   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17846
17847   offset = 0;
17848   offset += update ? 1 : 0;
17849   offset += return_pc ? 1 : 0;
17850
17851   /* Is the base register in the list?  */
17852   for (i = offset; i < num_saves; i++)
17853     {
17854       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17855       /* If SP is in the list, then the base register must be SP.  */
17856       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17857       /* If base register is in the list, there must be no explicit update.  */
17858       if (regno == regno_base)
17859         gcc_assert (!update);
17860     }
17861
17862   conditional = reverse ? "%?%D0" : "%?%d0";
17863   /* Can't use POP if returning from an interrupt.  */
17864   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17865     sprintf (pattern, "pop%s\t{", conditional);
17866   else
17867     {
17868       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17869          It's just a convention, their semantics are identical.  */
17870       if (regno_base == SP_REGNUM)
17871         sprintf (pattern, "ldmfd%s\t", conditional);
17872       else if (update)
17873         sprintf (pattern, "ldmia%s\t", conditional);
17874       else
17875         sprintf (pattern, "ldm%s\t", conditional);
17876
17877       strcat (pattern, reg_names[regno_base]);
17878       if (update)
17879         strcat (pattern, "!, {");
17880       else
17881         strcat (pattern, ", {");
17882     }
17883
17884   /* Output the first destination register.  */
17885   strcat (pattern,
17886           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17887
17888   /* Output the rest of the destination registers.  */
17889   for (i = offset + 1; i < num_saves; i++)
17890     {
17891       strcat (pattern, ", ");
17892       strcat (pattern,
17893               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17894     }
17895
17896   strcat (pattern, "}");
17897
17898   if (interrupt_p && return_pc)
17899     strcat (pattern, "^");
17900
17901   output_asm_insn (pattern, &cond);
17902 }
17903
17904
17905 /* Output the assembly for a store multiple.  */
17906
17907 const char *
17908 vfp_output_vstmd (rtx * operands)
17909 {
17910   char pattern[100];
17911   int p;
17912   int base;
17913   int i;
17914   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17915                    ? XEXP (operands[0], 0)
17916                    : XEXP (XEXP (operands[0], 0), 0);
17917   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17918
17919   if (push_p)
17920     strcpy (pattern, "vpush%?.64\t{%P1");
17921   else
17922     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17923
17924   p = strlen (pattern);
17925
17926   gcc_assert (REG_P (operands[1]));
17927
17928   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17929   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17930     {
17931       p += sprintf (&pattern[p], ", d%d", base + i);
17932     }
17933   strcpy (&pattern[p], "}");
17934
17935   output_asm_insn (pattern, operands);
17936   return "";
17937 }
17938
17939
17940 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17941    number of bytes pushed.  */
17942
17943 static int
17944 vfp_emit_fstmd (int base_reg, int count)
17945 {
17946   rtx par;
17947   rtx dwarf;
17948   rtx tmp, reg;
17949   int i;
17950
17951   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17952      register pairs are stored by a store multiple insn.  We avoid this
17953      by pushing an extra pair.  */
17954   if (count == 2 && !arm_arch6)
17955     {
17956       if (base_reg == LAST_VFP_REGNUM - 3)
17957         base_reg -= 2;
17958       count++;
17959     }
17960
17961   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17962      larger stores into multiple parts (up to a maximum of two, in
17963      practice).  */
17964   if (count > 16)
17965     {
17966       int saved;
17967       /* NOTE: base_reg is an internal register number, so each D register
17968          counts as 2.  */
17969       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17970       saved += vfp_emit_fstmd (base_reg, 16);
17971       return saved;
17972     }
17973
17974   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17975   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17976
17977   reg = gen_rtx_REG (DFmode, base_reg);
17978   base_reg += 2;
17979
17980   XVECEXP (par, 0, 0)
17981     = gen_rtx_SET (gen_frame_mem
17982                    (BLKmode,
17983                     gen_rtx_PRE_MODIFY (Pmode,
17984                                         stack_pointer_rtx,
17985                                         plus_constant
17986                                         (Pmode, stack_pointer_rtx,
17987                                          - (count * 8)))
17988                     ),
17989                    gen_rtx_UNSPEC (BLKmode,
17990                                    gen_rtvec (1, reg),
17991                                    UNSPEC_PUSH_MULT));
17992
17993   tmp = gen_rtx_SET (stack_pointer_rtx,
17994                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17995   RTX_FRAME_RELATED_P (tmp) = 1;
17996   XVECEXP (dwarf, 0, 0) = tmp;
17997
17998   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17999   RTX_FRAME_RELATED_P (tmp) = 1;
18000   XVECEXP (dwarf, 0, 1) = tmp;
18001
18002   for (i = 1; i < count; i++)
18003     {
18004       reg = gen_rtx_REG (DFmode, base_reg);
18005       base_reg += 2;
18006       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18007
18008       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18009                                         plus_constant (Pmode,
18010                                                        stack_pointer_rtx,
18011                                                        i * 8)),
18012                          reg);
18013       RTX_FRAME_RELATED_P (tmp) = 1;
18014       XVECEXP (dwarf, 0, i + 1) = tmp;
18015     }
18016
18017   par = emit_insn (par);
18018   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18019   RTX_FRAME_RELATED_P (par) = 1;
18020
18021   return count * 8;
18022 }
18023
18024 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18025    has the cmse_nonsecure_call attribute and returns false otherwise.  */
18026
18027 bool
18028 detect_cmse_nonsecure_call (tree addr)
18029 {
18030   if (!addr)
18031     return FALSE;
18032
18033   tree fntype = TREE_TYPE (addr);
18034   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18035                                     TYPE_ATTRIBUTES (fntype)))
18036     return TRUE;
18037   return FALSE;
18038 }
18039
18040
18041 /* Emit a call instruction with pattern PAT.  ADDR is the address of
18042    the call target.  */
18043
18044 void
18045 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18046 {
18047   rtx insn;
18048
18049   insn = emit_call_insn (pat);
18050
18051   /* The PIC register is live on entry to VxWorks PIC PLT entries.
18052      If the call might use such an entry, add a use of the PIC register
18053      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
18054   if (TARGET_VXWORKS_RTP
18055       && flag_pic
18056       && !sibcall
18057       && GET_CODE (addr) == SYMBOL_REF
18058       && (SYMBOL_REF_DECL (addr)
18059           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18060           : !SYMBOL_REF_LOCAL_P (addr)))
18061     {
18062       require_pic_register ();
18063       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18064     }
18065
18066   if (TARGET_AAPCS_BASED)
18067     {
18068       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18069          linker.  We need to add an IP clobber to allow setting
18070          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
18071          is not needed since it's a fixed register.  */
18072       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18073       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18074     }
18075 }
18076
18077 /* Output a 'call' insn.  */
18078 const char *
18079 output_call (rtx *operands)
18080 {
18081   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
18082
18083   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
18084   if (REGNO (operands[0]) == LR_REGNUM)
18085     {
18086       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18087       output_asm_insn ("mov%?\t%0, %|lr", operands);
18088     }
18089
18090   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18091
18092   if (TARGET_INTERWORK || arm_arch4t)
18093     output_asm_insn ("bx%?\t%0", operands);
18094   else
18095     output_asm_insn ("mov%?\t%|pc, %0", operands);
18096
18097   return "";
18098 }
18099
18100 /* Output a move from arm registers to arm registers of a long double
18101    OPERANDS[0] is the destination.
18102    OPERANDS[1] is the source.  */
18103 const char *
18104 output_mov_long_double_arm_from_arm (rtx *operands)
18105 {
18106   /* We have to be careful here because the two might overlap.  */
18107   int dest_start = REGNO (operands[0]);
18108   int src_start = REGNO (operands[1]);
18109   rtx ops[2];
18110   int i;
18111
18112   if (dest_start < src_start)
18113     {
18114       for (i = 0; i < 3; i++)
18115         {
18116           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18117           ops[1] = gen_rtx_REG (SImode, src_start + i);
18118           output_asm_insn ("mov%?\t%0, %1", ops);
18119         }
18120     }
18121   else
18122     {
18123       for (i = 2; i >= 0; i--)
18124         {
18125           ops[0] = gen_rtx_REG (SImode, dest_start + i);
18126           ops[1] = gen_rtx_REG (SImode, src_start + i);
18127           output_asm_insn ("mov%?\t%0, %1", ops);
18128         }
18129     }
18130
18131   return "";
18132 }
18133
18134 void
18135 arm_emit_movpair (rtx dest, rtx src)
18136  {
18137   /* If the src is an immediate, simplify it.  */
18138   if (CONST_INT_P (src))
18139     {
18140       HOST_WIDE_INT val = INTVAL (src);
18141       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18142       if ((val >> 16) & 0x0000ffff)
18143         {
18144           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18145                                                GEN_INT (16)),
18146                          GEN_INT ((val >> 16) & 0x0000ffff));
18147           rtx_insn *insn = get_last_insn ();
18148           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18149         }
18150       return;
18151     }
18152    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18153    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18154    rtx_insn *insn = get_last_insn ();
18155    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18156  }
18157
18158 /* Output a move between double words.  It must be REG<-MEM
18159    or MEM<-REG.  */
18160 const char *
18161 output_move_double (rtx *operands, bool emit, int *count)
18162 {
18163   enum rtx_code code0 = GET_CODE (operands[0]);
18164   enum rtx_code code1 = GET_CODE (operands[1]);
18165   rtx otherops[3];
18166   if (count)
18167     *count = 1;
18168
18169   /* The only case when this might happen is when
18170      you are looking at the length of a DImode instruction
18171      that has an invalid constant in it.  */
18172   if (code0 == REG && code1 != MEM)
18173     {
18174       gcc_assert (!emit);
18175       *count = 2;
18176       return "";
18177     }
18178
18179   if (code0 == REG)
18180     {
18181       unsigned int reg0 = REGNO (operands[0]);
18182
18183       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18184
18185       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18186
18187       switch (GET_CODE (XEXP (operands[1], 0)))
18188         {
18189         case REG:
18190
18191           if (emit)
18192             {
18193               if (TARGET_LDRD
18194                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18195                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18196               else
18197                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18198             }
18199           break;
18200
18201         case PRE_INC:
18202           gcc_assert (TARGET_LDRD);
18203           if (emit)
18204             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18205           break;
18206
18207         case PRE_DEC:
18208           if (emit)
18209             {
18210               if (TARGET_LDRD)
18211                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18212               else
18213                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18214             }
18215           break;
18216
18217         case POST_INC:
18218           if (emit)
18219             {
18220               if (TARGET_LDRD)
18221                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18222               else
18223                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18224             }
18225           break;
18226
18227         case POST_DEC:
18228           gcc_assert (TARGET_LDRD);
18229           if (emit)
18230             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18231           break;
18232
18233         case PRE_MODIFY:
18234         case POST_MODIFY:
18235           /* Autoicrement addressing modes should never have overlapping
18236              base and destination registers, and overlapping index registers
18237              are already prohibited, so this doesn't need to worry about
18238              fix_cm3_ldrd.  */
18239           otherops[0] = operands[0];
18240           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18241           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18242
18243           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18244             {
18245               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18246                 {
18247                   /* Registers overlap so split out the increment.  */
18248                   if (emit)
18249                     {
18250                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
18251                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18252                     }
18253                   if (count)
18254                     *count = 2;
18255                 }
18256               else
18257                 {
18258                   /* Use a single insn if we can.
18259                      FIXME: IWMMXT allows offsets larger than ldrd can
18260                      handle, fix these up with a pair of ldr.  */
18261                   if (TARGET_THUMB2
18262                       || !CONST_INT_P (otherops[2])
18263                       || (INTVAL (otherops[2]) > -256
18264                           && INTVAL (otherops[2]) < 256))
18265                     {
18266                       if (emit)
18267                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18268                     }
18269                   else
18270                     {
18271                       if (emit)
18272                         {
18273                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18274                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18275                         }
18276                       if (count)
18277                         *count = 2;
18278
18279                     }
18280                 }
18281             }
18282           else
18283             {
18284               /* Use a single insn if we can.
18285                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18286                  fix these up with a pair of ldr.  */
18287               if (TARGET_THUMB2
18288                   || !CONST_INT_P (otherops[2])
18289                   || (INTVAL (otherops[2]) > -256
18290                       && INTVAL (otherops[2]) < 256))
18291                 {
18292                   if (emit)
18293                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18294                 }
18295               else
18296                 {
18297                   if (emit)
18298                     {
18299                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18300                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18301                     }
18302                   if (count)
18303                     *count = 2;
18304                 }
18305             }
18306           break;
18307
18308         case LABEL_REF:
18309         case CONST:
18310           /* We might be able to use ldrd %0, %1 here.  However the range is
18311              different to ldr/adr, and it is broken on some ARMv7-M
18312              implementations.  */
18313           /* Use the second register of the pair to avoid problematic
18314              overlap.  */
18315           otherops[1] = operands[1];
18316           if (emit)
18317             output_asm_insn ("adr%?\t%0, %1", otherops);
18318           operands[1] = otherops[0];
18319           if (emit)
18320             {
18321               if (TARGET_LDRD)
18322                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18323               else
18324                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18325             }
18326
18327           if (count)
18328             *count = 2;
18329           break;
18330
18331           /* ??? This needs checking for thumb2.  */
18332         default:
18333           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18334                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18335             {
18336               otherops[0] = operands[0];
18337               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18338               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18339
18340               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18341                 {
18342                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18343                     {
18344                       switch ((int) INTVAL (otherops[2]))
18345                         {
18346                         case -8:
18347                           if (emit)
18348                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18349                           return "";
18350                         case -4:
18351                           if (TARGET_THUMB2)
18352                             break;
18353                           if (emit)
18354                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18355                           return "";
18356                         case 4:
18357                           if (TARGET_THUMB2)
18358                             break;
18359                           if (emit)
18360                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18361                           return "";
18362                         }
18363                     }
18364                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18365                   operands[1] = otherops[0];
18366                   if (TARGET_LDRD
18367                       && (REG_P (otherops[2])
18368                           || TARGET_THUMB2
18369                           || (CONST_INT_P (otherops[2])
18370                               && INTVAL (otherops[2]) > -256
18371                               && INTVAL (otherops[2]) < 256)))
18372                     {
18373                       if (reg_overlap_mentioned_p (operands[0],
18374                                                    otherops[2]))
18375                         {
18376                           /* Swap base and index registers over to
18377                              avoid a conflict.  */
18378                           std::swap (otherops[1], otherops[2]);
18379                         }
18380                       /* If both registers conflict, it will usually
18381                          have been fixed by a splitter.  */
18382                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18383                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18384                         {
18385                           if (emit)
18386                             {
18387                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18388                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18389                             }
18390                           if (count)
18391                             *count = 2;
18392                         }
18393                       else
18394                         {
18395                           otherops[0] = operands[0];
18396                           if (emit)
18397                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18398                         }
18399                       return "";
18400                     }
18401
18402                   if (CONST_INT_P (otherops[2]))
18403                     {
18404                       if (emit)
18405                         {
18406                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18407                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18408                           else
18409                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18410                         }
18411                     }
18412                   else
18413                     {
18414                       if (emit)
18415                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18416                     }
18417                 }
18418               else
18419                 {
18420                   if (emit)
18421                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18422                 }
18423
18424               if (count)
18425                 *count = 2;
18426
18427               if (TARGET_LDRD)
18428                 return "ldrd%?\t%0, [%1]";
18429
18430               return "ldmia%?\t%1, %M0";
18431             }
18432           else
18433             {
18434               otherops[1] = adjust_address (operands[1], SImode, 4);
18435               /* Take care of overlapping base/data reg.  */
18436               if (reg_mentioned_p (operands[0], operands[1]))
18437                 {
18438                   if (emit)
18439                     {
18440                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18441                       output_asm_insn ("ldr%?\t%0, %1", operands);
18442                     }
18443                   if (count)
18444                     *count = 2;
18445
18446                 }
18447               else
18448                 {
18449                   if (emit)
18450                     {
18451                       output_asm_insn ("ldr%?\t%0, %1", operands);
18452                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18453                     }
18454                   if (count)
18455                     *count = 2;
18456                 }
18457             }
18458         }
18459     }
18460   else
18461     {
18462       /* Constraints should ensure this.  */
18463       gcc_assert (code0 == MEM && code1 == REG);
18464       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18465                   || (TARGET_ARM && TARGET_LDRD));
18466
18467       /* For TARGET_ARM the first source register of an STRD
18468          must be even.  This is usually the case for double-word
18469          values but user assembly constraints can force an odd
18470          starting register.  */
18471       bool allow_strd = TARGET_LDRD
18472                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18473       switch (GET_CODE (XEXP (operands[0], 0)))
18474         {
18475         case REG:
18476           if (emit)
18477             {
18478               if (allow_strd)
18479                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18480               else
18481                 output_asm_insn ("stm%?\t%m0, %M1", operands);
18482             }
18483           break;
18484
18485         case PRE_INC:
18486           gcc_assert (allow_strd);
18487           if (emit)
18488             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18489           break;
18490
18491         case PRE_DEC:
18492           if (emit)
18493             {
18494               if (allow_strd)
18495                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18496               else
18497                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18498             }
18499           break;
18500
18501         case POST_INC:
18502           if (emit)
18503             {
18504               if (allow_strd)
18505                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18506               else
18507                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18508             }
18509           break;
18510
18511         case POST_DEC:
18512           gcc_assert (allow_strd);
18513           if (emit)
18514             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18515           break;
18516
18517         case PRE_MODIFY:
18518         case POST_MODIFY:
18519           otherops[0] = operands[1];
18520           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18521           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18522
18523           /* IWMMXT allows offsets larger than strd can handle,
18524              fix these up with a pair of str.  */
18525           if (!TARGET_THUMB2
18526               && CONST_INT_P (otherops[2])
18527               && (INTVAL(otherops[2]) <= -256
18528                   || INTVAL(otherops[2]) >= 256))
18529             {
18530               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18531                 {
18532                   if (emit)
18533                     {
18534                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18535                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18536                     }
18537                   if (count)
18538                     *count = 2;
18539                 }
18540               else
18541                 {
18542                   if (emit)
18543                     {
18544                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18545                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18546                     }
18547                   if (count)
18548                     *count = 2;
18549                 }
18550             }
18551           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18552             {
18553               if (emit)
18554                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18555             }
18556           else
18557             {
18558               if (emit)
18559                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18560             }
18561           break;
18562
18563         case PLUS:
18564           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18565           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18566             {
18567               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18568                 {
18569                 case -8:
18570                   if (emit)
18571                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18572                   return "";
18573
18574                 case -4:
18575                   if (TARGET_THUMB2)
18576                     break;
18577                   if (emit)
18578                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
18579                   return "";
18580
18581                 case 4:
18582                   if (TARGET_THUMB2)
18583                     break;
18584                   if (emit)
18585                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
18586                   return "";
18587                 }
18588             }
18589           if (allow_strd
18590               && (REG_P (otherops[2])
18591                   || TARGET_THUMB2
18592                   || (CONST_INT_P (otherops[2])
18593                       && INTVAL (otherops[2]) > -256
18594                       && INTVAL (otherops[2]) < 256)))
18595             {
18596               otherops[0] = operands[1];
18597               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18598               if (emit)
18599                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18600               return "";
18601             }
18602           /* Fall through */
18603
18604         default:
18605           otherops[0] = adjust_address (operands[0], SImode, 4);
18606           otherops[1] = operands[1];
18607           if (emit)
18608             {
18609               output_asm_insn ("str%?\t%1, %0", operands);
18610               output_asm_insn ("str%?\t%H1, %0", otherops);
18611             }
18612           if (count)
18613             *count = 2;
18614         }
18615     }
18616
18617   return "";
18618 }
18619
18620 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18621    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18622
18623 const char *
18624 output_move_quad (rtx *operands)
18625 {
18626   if (REG_P (operands[0]))
18627     {
18628       /* Load, or reg->reg move.  */
18629
18630       if (MEM_P (operands[1]))
18631         {
18632           switch (GET_CODE (XEXP (operands[1], 0)))
18633             {
18634             case REG:
18635               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18636               break;
18637
18638             case LABEL_REF:
18639             case CONST:
18640               output_asm_insn ("adr%?\t%0, %1", operands);
18641               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18642               break;
18643
18644             default:
18645               gcc_unreachable ();
18646             }
18647         }
18648       else
18649         {
18650           rtx ops[2];
18651           int dest, src, i;
18652
18653           gcc_assert (REG_P (operands[1]));
18654
18655           dest = REGNO (operands[0]);
18656           src = REGNO (operands[1]);
18657
18658           /* This seems pretty dumb, but hopefully GCC won't try to do it
18659              very often.  */
18660           if (dest < src)
18661             for (i = 0; i < 4; i++)
18662               {
18663                 ops[0] = gen_rtx_REG (SImode, dest + i);
18664                 ops[1] = gen_rtx_REG (SImode, src + i);
18665                 output_asm_insn ("mov%?\t%0, %1", ops);
18666               }
18667           else
18668             for (i = 3; i >= 0; i--)
18669               {
18670                 ops[0] = gen_rtx_REG (SImode, dest + i);
18671                 ops[1] = gen_rtx_REG (SImode, src + i);
18672                 output_asm_insn ("mov%?\t%0, %1", ops);
18673               }
18674         }
18675     }
18676   else
18677     {
18678       gcc_assert (MEM_P (operands[0]));
18679       gcc_assert (REG_P (operands[1]));
18680       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18681
18682       switch (GET_CODE (XEXP (operands[0], 0)))
18683         {
18684         case REG:
18685           output_asm_insn ("stm%?\t%m0, %M1", operands);
18686           break;
18687
18688         default:
18689           gcc_unreachable ();
18690         }
18691     }
18692
18693   return "";
18694 }
18695
18696 /* Output a VFP load or store instruction.  */
18697
18698 const char *
18699 output_move_vfp (rtx *operands)
18700 {
18701   rtx reg, mem, addr, ops[2];
18702   int load = REG_P (operands[0]);
18703   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18704   int sp = (!TARGET_VFP_FP16INST
18705             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18706   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18707   const char *templ;
18708   char buff[50];
18709   machine_mode mode;
18710
18711   reg = operands[!load];
18712   mem = operands[load];
18713
18714   mode = GET_MODE (reg);
18715
18716   gcc_assert (REG_P (reg));
18717   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18718   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18719               || mode == SFmode
18720               || mode == DFmode
18721               || mode == HImode
18722               || mode == SImode
18723               || mode == DImode
18724               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18725   gcc_assert (MEM_P (mem));
18726
18727   addr = XEXP (mem, 0);
18728
18729   switch (GET_CODE (addr))
18730     {
18731     case PRE_DEC:
18732       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18733       ops[0] = XEXP (addr, 0);
18734       ops[1] = reg;
18735       break;
18736
18737     case POST_INC:
18738       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18739       ops[0] = XEXP (addr, 0);
18740       ops[1] = reg;
18741       break;
18742
18743     default:
18744       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18745       ops[0] = reg;
18746       ops[1] = mem;
18747       break;
18748     }
18749
18750   sprintf (buff, templ,
18751            load ? "ld" : "st",
18752            dp ? "64" : sp ? "32" : "16",
18753            dp ? "P" : "",
18754            integer_p ? "\t%@ int" : "");
18755   output_asm_insn (buff, ops);
18756
18757   return "";
18758 }
18759
18760 /* Output a Neon double-word or quad-word load or store, or a load
18761    or store for larger structure modes.
18762
18763    WARNING: The ordering of elements is weird in big-endian mode,
18764    because the EABI requires that vectors stored in memory appear
18765    as though they were stored by a VSTM, as required by the EABI.
18766    GCC RTL defines element ordering based on in-memory order.
18767    This can be different from the architectural ordering of elements
18768    within a NEON register. The intrinsics defined in arm_neon.h use the
18769    NEON register element ordering, not the GCC RTL element ordering.
18770
18771    For example, the in-memory ordering of a big-endian a quadword
18772    vector with 16-bit elements when stored from register pair {d0,d1}
18773    will be (lowest address first, d0[N] is NEON register element N):
18774
18775      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18776
18777    When necessary, quadword registers (dN, dN+1) are moved to ARM
18778    registers from rN in the order:
18779
18780      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18781
18782    So that STM/LDM can be used on vectors in ARM registers, and the
18783    same memory layout will result as if VSTM/VLDM were used.
18784
18785    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18786    possible, which allows use of appropriate alignment tags.
18787    Note that the choice of "64" is independent of the actual vector
18788    element size; this size simply ensures that the behavior is
18789    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18790
18791    Due to limitations of those instructions, use of VST1.64/VLD1.64
18792    is not possible if:
18793     - the address contains PRE_DEC, or
18794     - the mode refers to more than 4 double-word registers
18795
18796    In those cases, it would be possible to replace VSTM/VLDM by a
18797    sequence of instructions; this is not currently implemented since
18798    this is not certain to actually improve performance.  */
18799
18800 const char *
18801 output_move_neon (rtx *operands)
18802 {
18803   rtx reg, mem, addr, ops[2];
18804   int regno, nregs, load = REG_P (operands[0]);
18805   const char *templ;
18806   char buff[50];
18807   machine_mode mode;
18808
18809   reg = operands[!load];
18810   mem = operands[load];
18811
18812   mode = GET_MODE (reg);
18813
18814   gcc_assert (REG_P (reg));
18815   regno = REGNO (reg);
18816   nregs = REG_NREGS (reg) / 2;
18817   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18818               || NEON_REGNO_OK_FOR_QUAD (regno));
18819   gcc_assert (VALID_NEON_DREG_MODE (mode)
18820               || VALID_NEON_QREG_MODE (mode)
18821               || VALID_NEON_STRUCT_MODE (mode));
18822   gcc_assert (MEM_P (mem));
18823
18824   addr = XEXP (mem, 0);
18825
18826   /* Strip off const from addresses like (const (plus (...))).  */
18827   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18828     addr = XEXP (addr, 0);
18829
18830   switch (GET_CODE (addr))
18831     {
18832     case POST_INC:
18833       /* We have to use vldm / vstm for too-large modes.  */
18834       if (nregs > 4)
18835         {
18836           templ = "v%smia%%?\t%%0!, %%h1";
18837           ops[0] = XEXP (addr, 0);
18838         }
18839       else
18840         {
18841           templ = "v%s1.64\t%%h1, %%A0";
18842           ops[0] = mem;
18843         }
18844       ops[1] = reg;
18845       break;
18846
18847     case PRE_DEC:
18848       /* We have to use vldm / vstm in this case, since there is no
18849          pre-decrement form of the vld1 / vst1 instructions.  */
18850       templ = "v%smdb%%?\t%%0!, %%h1";
18851       ops[0] = XEXP (addr, 0);
18852       ops[1] = reg;
18853       break;
18854
18855     case POST_MODIFY:
18856       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18857       gcc_unreachable ();
18858
18859     case REG:
18860       /* We have to use vldm / vstm for too-large modes.  */
18861       if (nregs > 1)
18862         {
18863           if (nregs > 4)
18864             templ = "v%smia%%?\t%%m0, %%h1";
18865           else
18866             templ = "v%s1.64\t%%h1, %%A0";
18867
18868           ops[0] = mem;
18869           ops[1] = reg;
18870           break;
18871         }
18872       /* Fall through.  */
18873     case LABEL_REF:
18874     case PLUS:
18875       {
18876         int i;
18877         int overlap = -1;
18878         for (i = 0; i < nregs; i++)
18879           {
18880             /* We're only using DImode here because it's a convenient size.  */
18881             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18882             ops[1] = adjust_address (mem, DImode, 8 * i);
18883             if (reg_overlap_mentioned_p (ops[0], mem))
18884               {
18885                 gcc_assert (overlap == -1);
18886                 overlap = i;
18887               }
18888             else
18889               {
18890                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18891                 output_asm_insn (buff, ops);
18892               }
18893           }
18894         if (overlap != -1)
18895           {
18896             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18897             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18898             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18899             output_asm_insn (buff, ops);
18900           }
18901
18902         return "";
18903       }
18904
18905     default:
18906       gcc_unreachable ();
18907     }
18908
18909   sprintf (buff, templ, load ? "ld" : "st");
18910   output_asm_insn (buff, ops);
18911
18912   return "";
18913 }
18914
18915 /* Compute and return the length of neon_mov<mode>, where <mode> is
18916    one of VSTRUCT modes: EI, OI, CI or XI.  */
18917 int
18918 arm_attr_length_move_neon (rtx_insn *insn)
18919 {
18920   rtx reg, mem, addr;
18921   int load;
18922   machine_mode mode;
18923
18924   extract_insn_cached (insn);
18925
18926   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18927     {
18928       mode = GET_MODE (recog_data.operand[0]);
18929       switch (mode)
18930         {
18931         case E_EImode:
18932         case E_OImode:
18933           return 8;
18934         case E_CImode:
18935           return 12;
18936         case E_XImode:
18937           return 16;
18938         default:
18939           gcc_unreachable ();
18940         }
18941     }
18942
18943   load = REG_P (recog_data.operand[0]);
18944   reg = recog_data.operand[!load];
18945   mem = recog_data.operand[load];
18946
18947   gcc_assert (MEM_P (mem));
18948
18949   addr = XEXP (mem, 0);
18950
18951   /* Strip off const from addresses like (const (plus (...))).  */
18952   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18953     addr = XEXP (addr, 0);
18954
18955   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18956     {
18957       int insns = REG_NREGS (reg) / 2;
18958       return insns * 4;
18959     }
18960   else
18961     return 4;
18962 }
18963
18964 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18965    return zero.  */
18966
18967 int
18968 arm_address_offset_is_imm (rtx_insn *insn)
18969 {
18970   rtx mem, addr;
18971
18972   extract_insn_cached (insn);
18973
18974   if (REG_P (recog_data.operand[0]))
18975     return 0;
18976
18977   mem = recog_data.operand[0];
18978
18979   gcc_assert (MEM_P (mem));
18980
18981   addr = XEXP (mem, 0);
18982
18983   if (REG_P (addr)
18984       || (GET_CODE (addr) == PLUS
18985           && REG_P (XEXP (addr, 0))
18986           && CONST_INT_P (XEXP (addr, 1))))
18987     return 1;
18988   else
18989     return 0;
18990 }
18991
18992 /* Output an ADD r, s, #n where n may be too big for one instruction.
18993    If adding zero to one register, output nothing.  */
18994 const char *
18995 output_add_immediate (rtx *operands)
18996 {
18997   HOST_WIDE_INT n = INTVAL (operands[2]);
18998
18999   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19000     {
19001       if (n < 0)
19002         output_multi_immediate (operands,
19003                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19004                                 -n);
19005       else
19006         output_multi_immediate (operands,
19007                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19008                                 n);
19009     }
19010
19011   return "";
19012 }
19013
19014 /* Output a multiple immediate operation.
19015    OPERANDS is the vector of operands referred to in the output patterns.
19016    INSTR1 is the output pattern to use for the first constant.
19017    INSTR2 is the output pattern to use for subsequent constants.
19018    IMMED_OP is the index of the constant slot in OPERANDS.
19019    N is the constant value.  */
19020 static const char *
19021 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19022                         int immed_op, HOST_WIDE_INT n)
19023 {
19024 #if HOST_BITS_PER_WIDE_INT > 32
19025   n &= 0xffffffff;
19026 #endif
19027
19028   if (n == 0)
19029     {
19030       /* Quick and easy output.  */
19031       operands[immed_op] = const0_rtx;
19032       output_asm_insn (instr1, operands);
19033     }
19034   else
19035     {
19036       int i;
19037       const char * instr = instr1;
19038
19039       /* Note that n is never zero here (which would give no output).  */
19040       for (i = 0; i < 32; i += 2)
19041         {
19042           if (n & (3 << i))
19043             {
19044               operands[immed_op] = GEN_INT (n & (255 << i));
19045               output_asm_insn (instr, operands);
19046               instr = instr2;
19047               i += 6;
19048             }
19049         }
19050     }
19051
19052   return "";
19053 }
19054
19055 /* Return the name of a shifter operation.  */
19056 static const char *
19057 arm_shift_nmem(enum rtx_code code)
19058 {
19059   switch (code)
19060     {
19061     case ASHIFT:
19062       return ARM_LSL_NAME;
19063
19064     case ASHIFTRT:
19065       return "asr";
19066
19067     case LSHIFTRT:
19068       return "lsr";
19069
19070     case ROTATERT:
19071       return "ror";
19072
19073     default:
19074       abort();
19075     }
19076 }
19077
19078 /* Return the appropriate ARM instruction for the operation code.
19079    The returned result should not be overwritten.  OP is the rtx of the
19080    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19081    was shifted.  */
19082 const char *
19083 arithmetic_instr (rtx op, int shift_first_arg)
19084 {
19085   switch (GET_CODE (op))
19086     {
19087     case PLUS:
19088       return "add";
19089
19090     case MINUS:
19091       return shift_first_arg ? "rsb" : "sub";
19092
19093     case IOR:
19094       return "orr";
19095
19096     case XOR:
19097       return "eor";
19098
19099     case AND:
19100       return "and";
19101
19102     case ASHIFT:
19103     case ASHIFTRT:
19104     case LSHIFTRT:
19105     case ROTATERT:
19106       return arm_shift_nmem(GET_CODE(op));
19107
19108     default:
19109       gcc_unreachable ();
19110     }
19111 }
19112
19113 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19114    for the operation code.  The returned result should not be overwritten.
19115    OP is the rtx code of the shift.
19116    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19117    shift.  */
19118 static const char *
19119 shift_op (rtx op, HOST_WIDE_INT *amountp)
19120 {
19121   const char * mnem;
19122   enum rtx_code code = GET_CODE (op);
19123
19124   switch (code)
19125     {
19126     case ROTATE:
19127       if (!CONST_INT_P (XEXP (op, 1)))
19128         {
19129           output_operand_lossage ("invalid shift operand");
19130           return NULL;
19131         }
19132
19133       code = ROTATERT;
19134       *amountp = 32 - INTVAL (XEXP (op, 1));
19135       mnem = "ror";
19136       break;
19137
19138     case ASHIFT:
19139     case ASHIFTRT:
19140     case LSHIFTRT:
19141     case ROTATERT:
19142       mnem = arm_shift_nmem(code);
19143       if (CONST_INT_P (XEXP (op, 1)))
19144         {
19145           *amountp = INTVAL (XEXP (op, 1));
19146         }
19147       else if (REG_P (XEXP (op, 1)))
19148         {
19149           *amountp = -1;
19150           return mnem;
19151         }
19152       else
19153         {
19154           output_operand_lossage ("invalid shift operand");
19155           return NULL;
19156         }
19157       break;
19158
19159     case MULT:
19160       /* We never have to worry about the amount being other than a
19161          power of 2, since this case can never be reloaded from a reg.  */
19162       if (!CONST_INT_P (XEXP (op, 1)))
19163         {
19164           output_operand_lossage ("invalid shift operand");
19165           return NULL;
19166         }
19167
19168       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19169
19170       /* Amount must be a power of two.  */
19171       if (*amountp & (*amountp - 1))
19172         {
19173           output_operand_lossage ("invalid shift operand");
19174           return NULL;
19175         }
19176
19177       *amountp = exact_log2 (*amountp);
19178       gcc_assert (IN_RANGE (*amountp, 0, 31));
19179       return ARM_LSL_NAME;
19180
19181     default:
19182       output_operand_lossage ("invalid shift operand");
19183       return NULL;
19184     }
19185
19186   /* This is not 100% correct, but follows from the desire to merge
19187      multiplication by a power of 2 with the recognizer for a
19188      shift.  >=32 is not a valid shift for "lsl", so we must try and
19189      output a shift that produces the correct arithmetical result.
19190      Using lsr #32 is identical except for the fact that the carry bit
19191      is not set correctly if we set the flags; but we never use the
19192      carry bit from such an operation, so we can ignore that.  */
19193   if (code == ROTATERT)
19194     /* Rotate is just modulo 32.  */
19195     *amountp &= 31;
19196   else if (*amountp != (*amountp & 31))
19197     {
19198       if (code == ASHIFT)
19199         mnem = "lsr";
19200       *amountp = 32;
19201     }
19202
19203   /* Shifts of 0 are no-ops.  */
19204   if (*amountp == 0)
19205     return NULL;
19206
19207   return mnem;
19208 }
19209
19210 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19211    because /bin/as is horribly restrictive.  The judgement about
19212    whether or not each character is 'printable' (and can be output as
19213    is) or not (and must be printed with an octal escape) must be made
19214    with reference to the *host* character set -- the situation is
19215    similar to that discussed in the comments above pp_c_char in
19216    c-pretty-print.c.  */
19217
19218 #define MAX_ASCII_LEN 51
19219
19220 void
19221 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19222 {
19223   int i;
19224   int len_so_far = 0;
19225
19226   fputs ("\t.ascii\t\"", stream);
19227
19228   for (i = 0; i < len; i++)
19229     {
19230       int c = p[i];
19231
19232       if (len_so_far >= MAX_ASCII_LEN)
19233         {
19234           fputs ("\"\n\t.ascii\t\"", stream);
19235           len_so_far = 0;
19236         }
19237
19238       if (ISPRINT (c))
19239         {
19240           if (c == '\\' || c == '\"')
19241             {
19242               putc ('\\', stream);
19243               len_so_far++;
19244             }
19245           putc (c, stream);
19246           len_so_far++;
19247         }
19248       else
19249         {
19250           fprintf (stream, "\\%03o", c);
19251           len_so_far += 4;
19252         }
19253     }
19254
19255   fputs ("\"\n", stream);
19256 }
19257 \f
19258 /* Whether a register is callee saved or not.  This is necessary because high
19259    registers are marked as caller saved when optimizing for size on Thumb-1
19260    targets despite being callee saved in order to avoid using them.  */
19261 #define callee_saved_reg_p(reg) \
19262   (!call_used_regs[reg] \
19263    || (TARGET_THUMB1 && optimize_size \
19264        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19265
19266 /* Compute the register save mask for registers 0 through 12
19267    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19268
19269 static unsigned long
19270 arm_compute_save_reg0_reg12_mask (void)
19271 {
19272   unsigned long func_type = arm_current_func_type ();
19273   unsigned long save_reg_mask = 0;
19274   unsigned int reg;
19275
19276   if (IS_INTERRUPT (func_type))
19277     {
19278       unsigned int max_reg;
19279       /* Interrupt functions must not corrupt any registers,
19280          even call clobbered ones.  If this is a leaf function
19281          we can just examine the registers used by the RTL, but
19282          otherwise we have to assume that whatever function is
19283          called might clobber anything, and so we have to save
19284          all the call-clobbered registers as well.  */
19285       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19286         /* FIQ handlers have registers r8 - r12 banked, so
19287            we only need to check r0 - r7, Normal ISRs only
19288            bank r14 and r15, so we must check up to r12.
19289            r13 is the stack pointer which is always preserved,
19290            so we do not need to consider it here.  */
19291         max_reg = 7;
19292       else
19293         max_reg = 12;
19294
19295       for (reg = 0; reg <= max_reg; reg++)
19296         if (df_regs_ever_live_p (reg)
19297             || (! crtl->is_leaf && call_used_regs[reg]))
19298           save_reg_mask |= (1 << reg);
19299
19300       /* Also save the pic base register if necessary.  */
19301       if (flag_pic
19302           && !TARGET_SINGLE_PIC_BASE
19303           && arm_pic_register != INVALID_REGNUM
19304           && crtl->uses_pic_offset_table)
19305         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19306     }
19307   else if (IS_VOLATILE(func_type))
19308     {
19309       /* For noreturn functions we historically omitted register saves
19310          altogether.  However this really messes up debugging.  As a
19311          compromise save just the frame pointers.  Combined with the link
19312          register saved elsewhere this should be sufficient to get
19313          a backtrace.  */
19314       if (frame_pointer_needed)
19315         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19316       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19317         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19318       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19319         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19320     }
19321   else
19322     {
19323       /* In the normal case we only need to save those registers
19324          which are call saved and which are used by this function.  */
19325       for (reg = 0; reg <= 11; reg++)
19326         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19327           save_reg_mask |= (1 << reg);
19328
19329       /* Handle the frame pointer as a special case.  */
19330       if (frame_pointer_needed)
19331         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19332
19333       /* If we aren't loading the PIC register,
19334          don't stack it even though it may be live.  */
19335       if (flag_pic
19336           && !TARGET_SINGLE_PIC_BASE
19337           && arm_pic_register != INVALID_REGNUM
19338           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19339               || crtl->uses_pic_offset_table))
19340         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19341
19342       /* The prologue will copy SP into R0, so save it.  */
19343       if (IS_STACKALIGN (func_type))
19344         save_reg_mask |= 1;
19345     }
19346
19347   /* Save registers so the exception handler can modify them.  */
19348   if (crtl->calls_eh_return)
19349     {
19350       unsigned int i;
19351
19352       for (i = 0; ; i++)
19353         {
19354           reg = EH_RETURN_DATA_REGNO (i);
19355           if (reg == INVALID_REGNUM)
19356             break;
19357           save_reg_mask |= 1 << reg;
19358         }
19359     }
19360
19361   return save_reg_mask;
19362 }
19363
19364 /* Return true if r3 is live at the start of the function.  */
19365
19366 static bool
19367 arm_r3_live_at_start_p (void)
19368 {
19369   /* Just look at cfg info, which is still close enough to correct at this
19370      point.  This gives false positives for broken functions that might use
19371      uninitialized data that happens to be allocated in r3, but who cares?  */
19372   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19373 }
19374
19375 /* Compute the number of bytes used to store the static chain register on the
19376    stack, above the stack frame.  We need to know this accurately to get the
19377    alignment of the rest of the stack frame correct.  */
19378
19379 static int
19380 arm_compute_static_chain_stack_bytes (void)
19381 {
19382   /* Once the value is updated from the init value of -1, do not
19383      re-compute.  */
19384   if (cfun->machine->static_chain_stack_bytes != -1)
19385     return cfun->machine->static_chain_stack_bytes;
19386
19387   /* See the defining assertion in arm_expand_prologue.  */
19388   if (IS_NESTED (arm_current_func_type ())
19389       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19390           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19391                || flag_stack_clash_protection)
19392               && !df_regs_ever_live_p (LR_REGNUM)))
19393       && arm_r3_live_at_start_p ()
19394       && crtl->args.pretend_args_size == 0)
19395     return 4;
19396
19397   return 0;
19398 }
19399
19400 /* Compute a bit mask of which core registers need to be
19401    saved on the stack for the current function.
19402    This is used by arm_compute_frame_layout, which may add extra registers.  */
19403
19404 static unsigned long
19405 arm_compute_save_core_reg_mask (void)
19406 {
19407   unsigned int save_reg_mask = 0;
19408   unsigned long func_type = arm_current_func_type ();
19409   unsigned int reg;
19410
19411   if (IS_NAKED (func_type))
19412     /* This should never really happen.  */
19413     return 0;
19414
19415   /* If we are creating a stack frame, then we must save the frame pointer,
19416      IP (which will hold the old stack pointer), LR and the PC.  */
19417   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19418     save_reg_mask |=
19419       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19420       | (1 << IP_REGNUM)
19421       | (1 << LR_REGNUM)
19422       | (1 << PC_REGNUM);
19423
19424   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19425
19426   /* Decide if we need to save the link register.
19427      Interrupt routines have their own banked link register,
19428      so they never need to save it.
19429      Otherwise if we do not use the link register we do not need to save
19430      it.  If we are pushing other registers onto the stack however, we
19431      can save an instruction in the epilogue by pushing the link register
19432      now and then popping it back into the PC.  This incurs extra memory
19433      accesses though, so we only do it when optimizing for size, and only
19434      if we know that we will not need a fancy return sequence.  */
19435   if (df_regs_ever_live_p (LR_REGNUM)
19436       || (save_reg_mask
19437           && optimize_size
19438           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19439           && !crtl->tail_call_emit
19440           && !crtl->calls_eh_return))
19441     save_reg_mask |= 1 << LR_REGNUM;
19442
19443   if (cfun->machine->lr_save_eliminated)
19444     save_reg_mask &= ~ (1 << LR_REGNUM);
19445
19446   if (TARGET_REALLY_IWMMXT
19447       && ((bit_count (save_reg_mask)
19448            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19449                            arm_compute_static_chain_stack_bytes())
19450            ) % 2) != 0)
19451     {
19452       /* The total number of registers that are going to be pushed
19453          onto the stack is odd.  We need to ensure that the stack
19454          is 64-bit aligned before we start to save iWMMXt registers,
19455          and also before we start to create locals.  (A local variable
19456          might be a double or long long which we will load/store using
19457          an iWMMXt instruction).  Therefore we need to push another
19458          ARM register, so that the stack will be 64-bit aligned.  We
19459          try to avoid using the arg registers (r0 -r3) as they might be
19460          used to pass values in a tail call.  */
19461       for (reg = 4; reg <= 12; reg++)
19462         if ((save_reg_mask & (1 << reg)) == 0)
19463           break;
19464
19465       if (reg <= 12)
19466         save_reg_mask |= (1 << reg);
19467       else
19468         {
19469           cfun->machine->sibcall_blocked = 1;
19470           save_reg_mask |= (1 << 3);
19471         }
19472     }
19473
19474   /* We may need to push an additional register for use initializing the
19475      PIC base register.  */
19476   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19477       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19478     {
19479       reg = thumb_find_work_register (1 << 4);
19480       if (!call_used_regs[reg])
19481         save_reg_mask |= (1 << reg);
19482     }
19483
19484   return save_reg_mask;
19485 }
19486
19487 /* Compute a bit mask of which core registers need to be
19488    saved on the stack for the current function.  */
19489 static unsigned long
19490 thumb1_compute_save_core_reg_mask (void)
19491 {
19492   unsigned long mask;
19493   unsigned reg;
19494
19495   mask = 0;
19496   for (reg = 0; reg < 12; reg ++)
19497     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19498       mask |= 1 << reg;
19499
19500   /* Handle the frame pointer as a special case.  */
19501   if (frame_pointer_needed)
19502     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19503
19504   if (flag_pic
19505       && !TARGET_SINGLE_PIC_BASE
19506       && arm_pic_register != INVALID_REGNUM
19507       && crtl->uses_pic_offset_table)
19508     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19509
19510   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19511   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19512     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19513
19514   /* LR will also be pushed if any lo regs are pushed.  */
19515   if (mask & 0xff || thumb_force_lr_save ())
19516     mask |= (1 << LR_REGNUM);
19517
19518   /* Make sure we have a low work register if we need one.
19519      We will need one if we are going to push a high register,
19520      but we are not currently intending to push a low register.  */
19521   if ((mask & 0xff) == 0
19522       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19523     {
19524       /* Use thumb_find_work_register to choose which register
19525          we will use.  If the register is live then we will
19526          have to push it.  Use LAST_LO_REGNUM as our fallback
19527          choice for the register to select.  */
19528       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19529       /* Make sure the register returned by thumb_find_work_register is
19530          not part of the return value.  */
19531       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19532         reg = LAST_LO_REGNUM;
19533
19534       if (callee_saved_reg_p (reg))
19535         mask |= 1 << reg;
19536     }
19537
19538   /* The 504 below is 8 bytes less than 512 because there are two possible
19539      alignment words.  We can't tell here if they will be present or not so we
19540      have to play it safe and assume that they are. */
19541   if ((CALLER_INTERWORKING_SLOT_SIZE +
19542        ROUND_UP_WORD (get_frame_size ()) +
19543        crtl->outgoing_args_size) >= 504)
19544     {
19545       /* This is the same as the code in thumb1_expand_prologue() which
19546          determines which register to use for stack decrement. */
19547       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19548         if (mask & (1 << reg))
19549           break;
19550
19551       if (reg > LAST_LO_REGNUM)
19552         {
19553           /* Make sure we have a register available for stack decrement. */
19554           mask |= 1 << LAST_LO_REGNUM;
19555         }
19556     }
19557
19558   return mask;
19559 }
19560
19561
19562 /* Return the number of bytes required to save VFP registers.  */
19563 static int
19564 arm_get_vfp_saved_size (void)
19565 {
19566   unsigned int regno;
19567   int count;
19568   int saved;
19569
19570   saved = 0;
19571   /* Space for saved VFP registers.  */
19572   if (TARGET_HARD_FLOAT)
19573     {
19574       count = 0;
19575       for (regno = FIRST_VFP_REGNUM;
19576            regno < LAST_VFP_REGNUM;
19577            regno += 2)
19578         {
19579           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19580               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19581             {
19582               if (count > 0)
19583                 {
19584                   /* Workaround ARM10 VFPr1 bug.  */
19585                   if (count == 2 && !arm_arch6)
19586                     count++;
19587                   saved += count * 8;
19588                 }
19589               count = 0;
19590             }
19591           else
19592             count++;
19593         }
19594       if (count > 0)
19595         {
19596           if (count == 2 && !arm_arch6)
19597             count++;
19598           saved += count * 8;
19599         }
19600     }
19601   return saved;
19602 }
19603
19604
19605 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19606    everything bar the final return instruction.  If simple_return is true,
19607    then do not output epilogue, because it has already been emitted in RTL.
19608
19609    Note: do not forget to update length attribute of corresponding insn pattern
19610    when changing assembly output (eg. length attribute of
19611    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19612    register clearing sequences).  */
19613 const char *
19614 output_return_instruction (rtx operand, bool really_return, bool reverse,
19615                            bool simple_return)
19616 {
19617   char conditional[10];
19618   char instr[100];
19619   unsigned reg;
19620   unsigned long live_regs_mask;
19621   unsigned long func_type;
19622   arm_stack_offsets *offsets;
19623
19624   func_type = arm_current_func_type ();
19625
19626   if (IS_NAKED (func_type))
19627     return "";
19628
19629   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19630     {
19631       /* If this function was declared non-returning, and we have
19632          found a tail call, then we have to trust that the called
19633          function won't return.  */
19634       if (really_return)
19635         {
19636           rtx ops[2];
19637
19638           /* Otherwise, trap an attempted return by aborting.  */
19639           ops[0] = operand;
19640           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19641                                        : "abort");
19642           assemble_external_libcall (ops[1]);
19643           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19644         }
19645
19646       return "";
19647     }
19648
19649   gcc_assert (!cfun->calls_alloca || really_return);
19650
19651   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19652
19653   cfun->machine->return_used_this_function = 1;
19654
19655   offsets = arm_get_frame_offsets ();
19656   live_regs_mask = offsets->saved_regs_mask;
19657
19658   if (!simple_return && live_regs_mask)
19659     {
19660       const char * return_reg;
19661
19662       /* If we do not have any special requirements for function exit
19663          (e.g. interworking) then we can load the return address
19664          directly into the PC.  Otherwise we must load it into LR.  */
19665       if (really_return
19666           && !IS_CMSE_ENTRY (func_type)
19667           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19668         return_reg = reg_names[PC_REGNUM];
19669       else
19670         return_reg = reg_names[LR_REGNUM];
19671
19672       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19673         {
19674           /* There are three possible reasons for the IP register
19675              being saved.  1) a stack frame was created, in which case
19676              IP contains the old stack pointer, or 2) an ISR routine
19677              corrupted it, or 3) it was saved to align the stack on
19678              iWMMXt.  In case 1, restore IP into SP, otherwise just
19679              restore IP.  */
19680           if (frame_pointer_needed)
19681             {
19682               live_regs_mask &= ~ (1 << IP_REGNUM);
19683               live_regs_mask |=   (1 << SP_REGNUM);
19684             }
19685           else
19686             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19687         }
19688
19689       /* On some ARM architectures it is faster to use LDR rather than
19690          LDM to load a single register.  On other architectures, the
19691          cost is the same.  In 26 bit mode, or for exception handlers,
19692          we have to use LDM to load the PC so that the CPSR is also
19693          restored.  */
19694       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19695         if (live_regs_mask == (1U << reg))
19696           break;
19697
19698       if (reg <= LAST_ARM_REGNUM
19699           && (reg != LR_REGNUM
19700               || ! really_return
19701               || ! IS_INTERRUPT (func_type)))
19702         {
19703           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19704                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19705         }
19706       else
19707         {
19708           char *p;
19709           int first = 1;
19710
19711           /* Generate the load multiple instruction to restore the
19712              registers.  Note we can get here, even if
19713              frame_pointer_needed is true, but only if sp already
19714              points to the base of the saved core registers.  */
19715           if (live_regs_mask & (1 << SP_REGNUM))
19716             {
19717               unsigned HOST_WIDE_INT stack_adjust;
19718
19719               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19720               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19721
19722               if (stack_adjust && arm_arch5t && TARGET_ARM)
19723                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19724               else
19725                 {
19726                   /* If we can't use ldmib (SA110 bug),
19727                      then try to pop r3 instead.  */
19728                   if (stack_adjust)
19729                     live_regs_mask |= 1 << 3;
19730
19731                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19732                 }
19733             }
19734           /* For interrupt returns we have to use an LDM rather than
19735              a POP so that we can use the exception return variant.  */
19736           else if (IS_INTERRUPT (func_type))
19737             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19738           else
19739             sprintf (instr, "pop%s\t{", conditional);
19740
19741           p = instr + strlen (instr);
19742
19743           for (reg = 0; reg <= SP_REGNUM; reg++)
19744             if (live_regs_mask & (1 << reg))
19745               {
19746                 int l = strlen (reg_names[reg]);
19747
19748                 if (first)
19749                   first = 0;
19750                 else
19751                   {
19752                     memcpy (p, ", ", 2);
19753                     p += 2;
19754                   }
19755
19756                 memcpy (p, "%|", 2);
19757                 memcpy (p + 2, reg_names[reg], l);
19758                 p += l + 2;
19759               }
19760
19761           if (live_regs_mask & (1 << LR_REGNUM))
19762             {
19763               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19764               /* If returning from an interrupt, restore the CPSR.  */
19765               if (IS_INTERRUPT (func_type))
19766                 strcat (p, "^");
19767             }
19768           else
19769             strcpy (p, "}");
19770         }
19771
19772       output_asm_insn (instr, & operand);
19773
19774       /* See if we need to generate an extra instruction to
19775          perform the actual function return.  */
19776       if (really_return
19777           && func_type != ARM_FT_INTERWORKED
19778           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19779         {
19780           /* The return has already been handled
19781              by loading the LR into the PC.  */
19782           return "";
19783         }
19784     }
19785
19786   if (really_return)
19787     {
19788       switch ((int) ARM_FUNC_TYPE (func_type))
19789         {
19790         case ARM_FT_ISR:
19791         case ARM_FT_FIQ:
19792           /* ??? This is wrong for unified assembly syntax.  */
19793           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19794           break;
19795
19796         case ARM_FT_INTERWORKED:
19797           gcc_assert (arm_arch5t || arm_arch4t);
19798           sprintf (instr, "bx%s\t%%|lr", conditional);
19799           break;
19800
19801         case ARM_FT_EXCEPTION:
19802           /* ??? This is wrong for unified assembly syntax.  */
19803           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19804           break;
19805
19806         default:
19807           if (IS_CMSE_ENTRY (func_type))
19808             {
19809               /* Check if we have to clear the 'GE bits' which is only used if
19810                  parallel add and subtraction instructions are available.  */
19811               if (TARGET_INT_SIMD)
19812                 snprintf (instr, sizeof (instr),
19813                           "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19814               else
19815                 snprintf (instr, sizeof (instr),
19816                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19817
19818               output_asm_insn (instr, & operand);
19819               if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19820                 {
19821                   /* Clear the cumulative exception-status bits (0-4,7) and the
19822                      condition code bits (28-31) of the FPSCR.  We need to
19823                      remember to clear the first scratch register used (IP) and
19824                      save and restore the second (r4).  */
19825                   snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19826                   output_asm_insn (instr, & operand);
19827                   snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19828                   output_asm_insn (instr, & operand);
19829                   snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19830                   output_asm_insn (instr, & operand);
19831                   snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19832                   output_asm_insn (instr, & operand);
19833                   snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19834                   output_asm_insn (instr, & operand);
19835                   snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19836                   output_asm_insn (instr, & operand);
19837                   snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19838                   output_asm_insn (instr, & operand);
19839                   snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19840                   output_asm_insn (instr, & operand);
19841                 }
19842               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19843             }
19844           /* Use bx if it's available.  */
19845           else if (arm_arch5t || arm_arch4t)
19846             sprintf (instr, "bx%s\t%%|lr", conditional);
19847           else
19848             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19849           break;
19850         }
19851
19852       output_asm_insn (instr, & operand);
19853     }
19854
19855   return "";
19856 }
19857
19858 /* Output in FILE asm statements needed to declare the NAME of the function
19859    defined by its DECL node.  */
19860
19861 void
19862 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19863 {
19864   size_t cmse_name_len;
19865   char *cmse_name = 0;
19866   char cmse_prefix[] = "__acle_se_";
19867
19868   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19869      extra function label for each function with the 'cmse_nonsecure_entry'
19870      attribute.  This extra function label should be prepended with
19871      '__acle_se_', telling the linker that it needs to create secure gateway
19872      veneers for this function.  */
19873   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19874                                     DECL_ATTRIBUTES (decl)))
19875     {
19876       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19877       cmse_name = XALLOCAVEC (char, cmse_name_len);
19878       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19879       targetm.asm_out.globalize_label (file, cmse_name);
19880
19881       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19882       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19883     }
19884
19885   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19886   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19887   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19888   ASM_OUTPUT_LABEL (file, name);
19889
19890   if (cmse_name)
19891     ASM_OUTPUT_LABEL (file, cmse_name);
19892
19893   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19894 }
19895
19896 /* Write the function name into the code section, directly preceding
19897    the function prologue.
19898
19899    Code will be output similar to this:
19900      t0
19901          .ascii "arm_poke_function_name", 0
19902          .align
19903      t1
19904          .word 0xff000000 + (t1 - t0)
19905      arm_poke_function_name
19906          mov     ip, sp
19907          stmfd   sp!, {fp, ip, lr, pc}
19908          sub     fp, ip, #4
19909
19910    When performing a stack backtrace, code can inspect the value
19911    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19912    at location pc - 12 and the top 8 bits are set, then we know
19913    that there is a function name embedded immediately preceding this
19914    location and has length ((pc[-3]) & 0xff000000).
19915
19916    We assume that pc is declared as a pointer to an unsigned long.
19917
19918    It is of no benefit to output the function name if we are assembling
19919    a leaf function.  These function types will not contain a stack
19920    backtrace structure, therefore it is not possible to determine the
19921    function name.  */
19922 void
19923 arm_poke_function_name (FILE *stream, const char *name)
19924 {
19925   unsigned long alignlength;
19926   unsigned long length;
19927   rtx           x;
19928
19929   length      = strlen (name) + 1;
19930   alignlength = ROUND_UP_WORD (length);
19931
19932   ASM_OUTPUT_ASCII (stream, name, length);
19933   ASM_OUTPUT_ALIGN (stream, 2);
19934   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19935   assemble_aligned_integer (UNITS_PER_WORD, x);
19936 }
19937
19938 /* Place some comments into the assembler stream
19939    describing the current function.  */
19940 static void
19941 arm_output_function_prologue (FILE *f)
19942 {
19943   unsigned long func_type;
19944
19945   /* Sanity check.  */
19946   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19947
19948   func_type = arm_current_func_type ();
19949
19950   switch ((int) ARM_FUNC_TYPE (func_type))
19951     {
19952     default:
19953     case ARM_FT_NORMAL:
19954       break;
19955     case ARM_FT_INTERWORKED:
19956       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19957       break;
19958     case ARM_FT_ISR:
19959       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19960       break;
19961     case ARM_FT_FIQ:
19962       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19963       break;
19964     case ARM_FT_EXCEPTION:
19965       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19966       break;
19967     }
19968
19969   if (IS_NAKED (func_type))
19970     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19971
19972   if (IS_VOLATILE (func_type))
19973     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19974
19975   if (IS_NESTED (func_type))
19976     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19977   if (IS_STACKALIGN (func_type))
19978     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19979   if (IS_CMSE_ENTRY (func_type))
19980     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19981
19982   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
19983                (HOST_WIDE_INT) crtl->args.size,
19984                crtl->args.pretend_args_size,
19985                (HOST_WIDE_INT) get_frame_size ());
19986
19987   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19988                frame_pointer_needed,
19989                cfun->machine->uses_anonymous_args);
19990
19991   if (cfun->machine->lr_save_eliminated)
19992     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19993
19994   if (crtl->calls_eh_return)
19995     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19996
19997 }
19998
19999 static void
20000 arm_output_function_epilogue (FILE *)
20001 {
20002   arm_stack_offsets *offsets;
20003
20004   if (TARGET_THUMB1)
20005     {
20006       int regno;
20007
20008       /* Emit any call-via-reg trampolines that are needed for v4t support
20009          of call_reg and call_value_reg type insns.  */
20010       for (regno = 0; regno < LR_REGNUM; regno++)
20011         {
20012           rtx label = cfun->machine->call_via[regno];
20013
20014           if (label != NULL)
20015             {
20016               switch_to_section (function_section (current_function_decl));
20017               targetm.asm_out.internal_label (asm_out_file, "L",
20018                                               CODE_LABEL_NUMBER (label));
20019               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20020             }
20021         }
20022
20023       /* ??? Probably not safe to set this here, since it assumes that a
20024          function will be emitted as assembly immediately after we generate
20025          RTL for it.  This does not happen for inline functions.  */
20026       cfun->machine->return_used_this_function = 0;
20027     }
20028   else /* TARGET_32BIT */
20029     {
20030       /* We need to take into account any stack-frame rounding.  */
20031       offsets = arm_get_frame_offsets ();
20032
20033       gcc_assert (!use_return_insn (FALSE, NULL)
20034                   || (cfun->machine->return_used_this_function != 0)
20035                   || offsets->saved_regs == offsets->outgoing_args
20036                   || frame_pointer_needed);
20037     }
20038 }
20039
20040 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20041    STR and STRD.  If an even number of registers are being pushed, one
20042    or more STRD patterns are created for each register pair.  If an
20043    odd number of registers are pushed, emit an initial STR followed by
20044    as many STRD instructions as are needed.  This works best when the
20045    stack is initially 64-bit aligned (the normal case), since it
20046    ensures that each STRD is also 64-bit aligned.  */
20047 static void
20048 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20049 {
20050   int num_regs = 0;
20051   int i;
20052   int regno;
20053   rtx par = NULL_RTX;
20054   rtx dwarf = NULL_RTX;
20055   rtx tmp;
20056   bool first = true;
20057
20058   num_regs = bit_count (saved_regs_mask);
20059
20060   /* Must be at least one register to save, and can't save SP or PC.  */
20061   gcc_assert (num_regs > 0 && num_regs <= 14);
20062   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20063   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20064
20065   /* Create sequence for DWARF info.  All the frame-related data for
20066      debugging is held in this wrapper.  */
20067   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20068
20069   /* Describe the stack adjustment.  */
20070   tmp = gen_rtx_SET (stack_pointer_rtx,
20071                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20072   RTX_FRAME_RELATED_P (tmp) = 1;
20073   XVECEXP (dwarf, 0, 0) = tmp;
20074
20075   /* Find the first register.  */
20076   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20077     ;
20078
20079   i = 0;
20080
20081   /* If there's an odd number of registers to push.  Start off by
20082      pushing a single register.  This ensures that subsequent strd
20083      operations are dword aligned (assuming that SP was originally
20084      64-bit aligned).  */
20085   if ((num_regs & 1) != 0)
20086     {
20087       rtx reg, mem, insn;
20088
20089       reg = gen_rtx_REG (SImode, regno);
20090       if (num_regs == 1)
20091         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20092                                                      stack_pointer_rtx));
20093       else
20094         mem = gen_frame_mem (Pmode,
20095                              gen_rtx_PRE_MODIFY
20096                              (Pmode, stack_pointer_rtx,
20097                               plus_constant (Pmode, stack_pointer_rtx,
20098                                              -4 * num_regs)));
20099
20100       tmp = gen_rtx_SET (mem, reg);
20101       RTX_FRAME_RELATED_P (tmp) = 1;
20102       insn = emit_insn (tmp);
20103       RTX_FRAME_RELATED_P (insn) = 1;
20104       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20105       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20106       RTX_FRAME_RELATED_P (tmp) = 1;
20107       i++;
20108       regno++;
20109       XVECEXP (dwarf, 0, i) = tmp;
20110       first = false;
20111     }
20112
20113   while (i < num_regs)
20114     if (saved_regs_mask & (1 << regno))
20115       {
20116         rtx reg1, reg2, mem1, mem2;
20117         rtx tmp0, tmp1, tmp2;
20118         int regno2;
20119
20120         /* Find the register to pair with this one.  */
20121         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20122              regno2++)
20123           ;
20124
20125         reg1 = gen_rtx_REG (SImode, regno);
20126         reg2 = gen_rtx_REG (SImode, regno2);
20127
20128         if (first)
20129           {
20130             rtx insn;
20131
20132             first = false;
20133             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20134                                                         stack_pointer_rtx,
20135                                                         -4 * num_regs));
20136             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20137                                                         stack_pointer_rtx,
20138                                                         -4 * (num_regs - 1)));
20139             tmp0 = gen_rtx_SET (stack_pointer_rtx,
20140                                 plus_constant (Pmode, stack_pointer_rtx,
20141                                                -4 * (num_regs)));
20142             tmp1 = gen_rtx_SET (mem1, reg1);
20143             tmp2 = gen_rtx_SET (mem2, reg2);
20144             RTX_FRAME_RELATED_P (tmp0) = 1;
20145             RTX_FRAME_RELATED_P (tmp1) = 1;
20146             RTX_FRAME_RELATED_P (tmp2) = 1;
20147             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20148             XVECEXP (par, 0, 0) = tmp0;
20149             XVECEXP (par, 0, 1) = tmp1;
20150             XVECEXP (par, 0, 2) = tmp2;
20151             insn = emit_insn (par);
20152             RTX_FRAME_RELATED_P (insn) = 1;
20153             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20154           }
20155         else
20156           {
20157             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20158                                                         stack_pointer_rtx,
20159                                                         4 * i));
20160             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20161                                                         stack_pointer_rtx,
20162                                                         4 * (i + 1)));
20163             tmp1 = gen_rtx_SET (mem1, reg1);
20164             tmp2 = gen_rtx_SET (mem2, reg2);
20165             RTX_FRAME_RELATED_P (tmp1) = 1;
20166             RTX_FRAME_RELATED_P (tmp2) = 1;
20167             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20168             XVECEXP (par, 0, 0) = tmp1;
20169             XVECEXP (par, 0, 1) = tmp2;
20170             emit_insn (par);
20171           }
20172
20173         /* Create unwind information.  This is an approximation.  */
20174         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20175                                            plus_constant (Pmode,
20176                                                           stack_pointer_rtx,
20177                                                           4 * i)),
20178                             reg1);
20179         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20180                                            plus_constant (Pmode,
20181                                                           stack_pointer_rtx,
20182                                                           4 * (i + 1))),
20183                             reg2);
20184
20185         RTX_FRAME_RELATED_P (tmp1) = 1;
20186         RTX_FRAME_RELATED_P (tmp2) = 1;
20187         XVECEXP (dwarf, 0, i + 1) = tmp1;
20188         XVECEXP (dwarf, 0, i + 2) = tmp2;
20189         i += 2;
20190         regno = regno2 + 1;
20191       }
20192     else
20193       regno++;
20194
20195   return;
20196 }
20197
20198 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20199    whenever possible, otherwise it emits single-word stores.  The first store
20200    also allocates stack space for all saved registers, using writeback with
20201    post-addressing mode.  All other stores use offset addressing.  If no STRD
20202    can be emitted, this function emits a sequence of single-word stores,
20203    and not an STM as before, because single-word stores provide more freedom
20204    scheduling and can be turned into an STM by peephole optimizations.  */
20205 static void
20206 arm_emit_strd_push (unsigned long saved_regs_mask)
20207 {
20208   int num_regs = 0;
20209   int i, j, dwarf_index  = 0;
20210   int offset = 0;
20211   rtx dwarf = NULL_RTX;
20212   rtx insn = NULL_RTX;
20213   rtx tmp, mem;
20214
20215   /* TODO: A more efficient code can be emitted by changing the
20216      layout, e.g., first push all pairs that can use STRD to keep the
20217      stack aligned, and then push all other registers.  */
20218   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20219     if (saved_regs_mask & (1 << i))
20220       num_regs++;
20221
20222   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20223   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20224   gcc_assert (num_regs > 0);
20225
20226   /* Create sequence for DWARF info.  */
20227   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20228
20229   /* For dwarf info, we generate explicit stack update.  */
20230   tmp = gen_rtx_SET (stack_pointer_rtx,
20231                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20232   RTX_FRAME_RELATED_P (tmp) = 1;
20233   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20234
20235   /* Save registers.  */
20236   offset = - 4 * num_regs;
20237   j = 0;
20238   while (j <= LAST_ARM_REGNUM)
20239     if (saved_regs_mask & (1 << j))
20240       {
20241         if ((j % 2 == 0)
20242             && (saved_regs_mask & (1 << (j + 1))))
20243           {
20244             /* Current register and previous register form register pair for
20245                which STRD can be generated.  */
20246             if (offset < 0)
20247               {
20248                 /* Allocate stack space for all saved registers.  */
20249                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20250                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20251                 mem = gen_frame_mem (DImode, tmp);
20252                 offset = 0;
20253               }
20254             else if (offset > 0)
20255               mem = gen_frame_mem (DImode,
20256                                    plus_constant (Pmode,
20257                                                   stack_pointer_rtx,
20258                                                   offset));
20259             else
20260               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20261
20262             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20263             RTX_FRAME_RELATED_P (tmp) = 1;
20264             tmp = emit_insn (tmp);
20265
20266             /* Record the first store insn.  */
20267             if (dwarf_index == 1)
20268               insn = tmp;
20269
20270             /* Generate dwarf info.  */
20271             mem = gen_frame_mem (SImode,
20272                                  plus_constant (Pmode,
20273                                                 stack_pointer_rtx,
20274                                                 offset));
20275             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20276             RTX_FRAME_RELATED_P (tmp) = 1;
20277             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20278
20279             mem = gen_frame_mem (SImode,
20280                                  plus_constant (Pmode,
20281                                                 stack_pointer_rtx,
20282                                                 offset + 4));
20283             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20284             RTX_FRAME_RELATED_P (tmp) = 1;
20285             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20286
20287             offset += 8;
20288             j += 2;
20289           }
20290         else
20291           {
20292             /* Emit a single word store.  */
20293             if (offset < 0)
20294               {
20295                 /* Allocate stack space for all saved registers.  */
20296                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20297                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20298                 mem = gen_frame_mem (SImode, tmp);
20299                 offset = 0;
20300               }
20301             else if (offset > 0)
20302               mem = gen_frame_mem (SImode,
20303                                    plus_constant (Pmode,
20304                                                   stack_pointer_rtx,
20305                                                   offset));
20306             else
20307               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20308
20309             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20310             RTX_FRAME_RELATED_P (tmp) = 1;
20311             tmp = emit_insn (tmp);
20312
20313             /* Record the first store insn.  */
20314             if (dwarf_index == 1)
20315               insn = tmp;
20316
20317             /* Generate dwarf info.  */
20318             mem = gen_frame_mem (SImode,
20319                                  plus_constant(Pmode,
20320                                                stack_pointer_rtx,
20321                                                offset));
20322             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20323             RTX_FRAME_RELATED_P (tmp) = 1;
20324             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20325
20326             offset += 4;
20327             j += 1;
20328           }
20329       }
20330     else
20331       j++;
20332
20333   /* Attach dwarf info to the first insn we generate.  */
20334   gcc_assert (insn != NULL_RTX);
20335   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20336   RTX_FRAME_RELATED_P (insn) = 1;
20337 }
20338
20339 /* Generate and emit an insn that we will recognize as a push_multi.
20340    Unfortunately, since this insn does not reflect very well the actual
20341    semantics of the operation, we need to annotate the insn for the benefit
20342    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20343    MASK for registers that should be annotated for DWARF2 frame unwind
20344    information.  */
20345 static rtx
20346 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20347 {
20348   int num_regs = 0;
20349   int num_dwarf_regs = 0;
20350   int i, j;
20351   rtx par;
20352   rtx dwarf;
20353   int dwarf_par_index;
20354   rtx tmp, reg;
20355
20356   /* We don't record the PC in the dwarf frame information.  */
20357   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20358
20359   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20360     {
20361       if (mask & (1 << i))
20362         num_regs++;
20363       if (dwarf_regs_mask & (1 << i))
20364         num_dwarf_regs++;
20365     }
20366
20367   gcc_assert (num_regs && num_regs <= 16);
20368   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20369
20370   /* For the body of the insn we are going to generate an UNSPEC in
20371      parallel with several USEs.  This allows the insn to be recognized
20372      by the push_multi pattern in the arm.md file.
20373
20374      The body of the insn looks something like this:
20375
20376        (parallel [
20377            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20378                                         (const_int:SI <num>)))
20379                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20380            (use (reg:SI XX))
20381            (use (reg:SI YY))
20382            ...
20383         ])
20384
20385      For the frame note however, we try to be more explicit and actually
20386      show each register being stored into the stack frame, plus a (single)
20387      decrement of the stack pointer.  We do it this way in order to be
20388      friendly to the stack unwinding code, which only wants to see a single
20389      stack decrement per instruction.  The RTL we generate for the note looks
20390      something like this:
20391
20392       (sequence [
20393            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20394            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20395            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20396            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20397            ...
20398         ])
20399
20400      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20401      instead we'd have a parallel expression detailing all
20402      the stores to the various memory addresses so that debug
20403      information is more up-to-date. Remember however while writing
20404      this to take care of the constraints with the push instruction.
20405
20406      Note also that this has to be taken care of for the VFP registers.
20407
20408      For more see PR43399.  */
20409
20410   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20411   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20412   dwarf_par_index = 1;
20413
20414   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20415     {
20416       if (mask & (1 << i))
20417         {
20418           reg = gen_rtx_REG (SImode, i);
20419
20420           XVECEXP (par, 0, 0)
20421             = gen_rtx_SET (gen_frame_mem
20422                            (BLKmode,
20423                             gen_rtx_PRE_MODIFY (Pmode,
20424                                                 stack_pointer_rtx,
20425                                                 plus_constant
20426                                                 (Pmode, stack_pointer_rtx,
20427                                                  -4 * num_regs))
20428                             ),
20429                            gen_rtx_UNSPEC (BLKmode,
20430                                            gen_rtvec (1, reg),
20431                                            UNSPEC_PUSH_MULT));
20432
20433           if (dwarf_regs_mask & (1 << i))
20434             {
20435               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20436                                  reg);
20437               RTX_FRAME_RELATED_P (tmp) = 1;
20438               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20439             }
20440
20441           break;
20442         }
20443     }
20444
20445   for (j = 1, i++; j < num_regs; i++)
20446     {
20447       if (mask & (1 << i))
20448         {
20449           reg = gen_rtx_REG (SImode, i);
20450
20451           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20452
20453           if (dwarf_regs_mask & (1 << i))
20454             {
20455               tmp
20456                 = gen_rtx_SET (gen_frame_mem
20457                                (SImode,
20458                                 plus_constant (Pmode, stack_pointer_rtx,
20459                                                4 * j)),
20460                                reg);
20461               RTX_FRAME_RELATED_P (tmp) = 1;
20462               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20463             }
20464
20465           j++;
20466         }
20467     }
20468
20469   par = emit_insn (par);
20470
20471   tmp = gen_rtx_SET (stack_pointer_rtx,
20472                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20473   RTX_FRAME_RELATED_P (tmp) = 1;
20474   XVECEXP (dwarf, 0, 0) = tmp;
20475
20476   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20477
20478   return par;
20479 }
20480
20481 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20482    SIZE is the offset to be adjusted.
20483    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20484 static void
20485 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20486 {
20487   rtx dwarf;
20488
20489   RTX_FRAME_RELATED_P (insn) = 1;
20490   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20491   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20492 }
20493
20494 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20495    SAVED_REGS_MASK shows which registers need to be restored.
20496
20497    Unfortunately, since this insn does not reflect very well the actual
20498    semantics of the operation, we need to annotate the insn for the benefit
20499    of DWARF2 frame unwind information.  */
20500 static void
20501 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20502 {
20503   int num_regs = 0;
20504   int i, j;
20505   rtx par;
20506   rtx dwarf = NULL_RTX;
20507   rtx tmp, reg;
20508   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20509   int offset_adj;
20510   int emit_update;
20511
20512   offset_adj = return_in_pc ? 1 : 0;
20513   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20514     if (saved_regs_mask & (1 << i))
20515       num_regs++;
20516
20517   gcc_assert (num_regs && num_regs <= 16);
20518
20519   /* If SP is in reglist, then we don't emit SP update insn.  */
20520   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20521
20522   /* The parallel needs to hold num_regs SETs
20523      and one SET for the stack update.  */
20524   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20525
20526   if (return_in_pc)
20527     XVECEXP (par, 0, 0) = ret_rtx;
20528
20529   if (emit_update)
20530     {
20531       /* Increment the stack pointer, based on there being
20532          num_regs 4-byte registers to restore.  */
20533       tmp = gen_rtx_SET (stack_pointer_rtx,
20534                          plus_constant (Pmode,
20535                                         stack_pointer_rtx,
20536                                         4 * num_regs));
20537       RTX_FRAME_RELATED_P (tmp) = 1;
20538       XVECEXP (par, 0, offset_adj) = tmp;
20539     }
20540
20541   /* Now restore every reg, which may include PC.  */
20542   for (j = 0, i = 0; j < num_regs; i++)
20543     if (saved_regs_mask & (1 << i))
20544       {
20545         reg = gen_rtx_REG (SImode, i);
20546         if ((num_regs == 1) && emit_update && !return_in_pc)
20547           {
20548             /* Emit single load with writeback.  */
20549             tmp = gen_frame_mem (SImode,
20550                                  gen_rtx_POST_INC (Pmode,
20551                                                    stack_pointer_rtx));
20552             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20553             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20554             return;
20555           }
20556
20557         tmp = gen_rtx_SET (reg,
20558                            gen_frame_mem
20559                            (SImode,
20560                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20561         RTX_FRAME_RELATED_P (tmp) = 1;
20562         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20563
20564         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20565            should not have PC, skip PC.  */
20566         if (i != PC_REGNUM)
20567           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20568
20569         j++;
20570       }
20571
20572   if (return_in_pc)
20573     par = emit_jump_insn (par);
20574   else
20575     par = emit_insn (par);
20576
20577   REG_NOTES (par) = dwarf;
20578   if (!return_in_pc)
20579     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20580                                  stack_pointer_rtx, stack_pointer_rtx);
20581 }
20582
20583 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20584    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20585
20586    Unfortunately, since this insn does not reflect very well the actual
20587    semantics of the operation, we need to annotate the insn for the benefit
20588    of DWARF2 frame unwind information.  */
20589 static void
20590 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20591 {
20592   int i, j;
20593   rtx par;
20594   rtx dwarf = NULL_RTX;
20595   rtx tmp, reg;
20596
20597   gcc_assert (num_regs && num_regs <= 32);
20598
20599     /* Workaround ARM10 VFPr1 bug.  */
20600   if (num_regs == 2 && !arm_arch6)
20601     {
20602       if (first_reg == 15)
20603         first_reg--;
20604
20605       num_regs++;
20606     }
20607
20608   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20609      there could be up to 32 D-registers to restore.
20610      If there are more than 16 D-registers, make two recursive calls,
20611      each of which emits one pop_multi instruction.  */
20612   if (num_regs > 16)
20613     {
20614       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20615       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20616       return;
20617     }
20618
20619   /* The parallel needs to hold num_regs SETs
20620      and one SET for the stack update.  */
20621   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20622
20623   /* Increment the stack pointer, based on there being
20624      num_regs 8-byte registers to restore.  */
20625   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20626   RTX_FRAME_RELATED_P (tmp) = 1;
20627   XVECEXP (par, 0, 0) = tmp;
20628
20629   /* Now show every reg that will be restored, using a SET for each.  */
20630   for (j = 0, i=first_reg; j < num_regs; i += 2)
20631     {
20632       reg = gen_rtx_REG (DFmode, i);
20633
20634       tmp = gen_rtx_SET (reg,
20635                          gen_frame_mem
20636                          (DFmode,
20637                           plus_constant (Pmode, base_reg, 8 * j)));
20638       RTX_FRAME_RELATED_P (tmp) = 1;
20639       XVECEXP (par, 0, j + 1) = tmp;
20640
20641       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20642
20643       j++;
20644     }
20645
20646   par = emit_insn (par);
20647   REG_NOTES (par) = dwarf;
20648
20649   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20650   if (REGNO (base_reg) == IP_REGNUM)
20651     {
20652       RTX_FRAME_RELATED_P (par) = 1;
20653       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20654     }
20655   else
20656     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20657                                  base_reg, base_reg);
20658 }
20659
20660 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20661    number of registers are being popped, multiple LDRD patterns are created for
20662    all register pairs.  If odd number of registers are popped, last register is
20663    loaded by using LDR pattern.  */
20664 static void
20665 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20666 {
20667   int num_regs = 0;
20668   int i, j;
20669   rtx par = NULL_RTX;
20670   rtx dwarf = NULL_RTX;
20671   rtx tmp, reg, tmp1;
20672   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20673
20674   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20675     if (saved_regs_mask & (1 << i))
20676       num_regs++;
20677
20678   gcc_assert (num_regs && num_regs <= 16);
20679
20680   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20681      to be popped.  So, if num_regs is even, now it will become odd,
20682      and we can generate pop with PC.  If num_regs is odd, it will be
20683      even now, and ldr with return can be generated for PC.  */
20684   if (return_in_pc)
20685     num_regs--;
20686
20687   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20688
20689   /* Var j iterates over all the registers to gather all the registers in
20690      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20691      A PARALLEL RTX of register-pair is created here, so that pattern for
20692      LDRD can be matched.  As PC is always last register to be popped, and
20693      we have already decremented num_regs if PC, we don't have to worry
20694      about PC in this loop.  */
20695   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20696     if (saved_regs_mask & (1 << j))
20697       {
20698         /* Create RTX for memory load.  */
20699         reg = gen_rtx_REG (SImode, j);
20700         tmp = gen_rtx_SET (reg,
20701                            gen_frame_mem (SImode,
20702                                plus_constant (Pmode,
20703                                               stack_pointer_rtx, 4 * i)));
20704         RTX_FRAME_RELATED_P (tmp) = 1;
20705
20706         if (i % 2 == 0)
20707           {
20708             /* When saved-register index (i) is even, the RTX to be emitted is
20709                yet to be created.  Hence create it first.  The LDRD pattern we
20710                are generating is :
20711                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20712                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20713                where target registers need not be consecutive.  */
20714             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20715             dwarf = NULL_RTX;
20716           }
20717
20718         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20719            added as 0th element and if i is odd, reg_i is added as 1st element
20720            of LDRD pattern shown above.  */
20721         XVECEXP (par, 0, (i % 2)) = tmp;
20722         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20723
20724         if ((i % 2) == 1)
20725           {
20726             /* When saved-register index (i) is odd, RTXs for both the registers
20727                to be loaded are generated in above given LDRD pattern, and the
20728                pattern can be emitted now.  */
20729             par = emit_insn (par);
20730             REG_NOTES (par) = dwarf;
20731             RTX_FRAME_RELATED_P (par) = 1;
20732           }
20733
20734         i++;
20735       }
20736
20737   /* If the number of registers pushed is odd AND return_in_pc is false OR
20738      number of registers are even AND return_in_pc is true, last register is
20739      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20740      then LDR with post increment.  */
20741
20742   /* Increment the stack pointer, based on there being
20743      num_regs 4-byte registers to restore.  */
20744   tmp = gen_rtx_SET (stack_pointer_rtx,
20745                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20746   RTX_FRAME_RELATED_P (tmp) = 1;
20747   tmp = emit_insn (tmp);
20748   if (!return_in_pc)
20749     {
20750       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20751                                    stack_pointer_rtx, stack_pointer_rtx);
20752     }
20753
20754   dwarf = NULL_RTX;
20755
20756   if (((num_regs % 2) == 1 && !return_in_pc)
20757       || ((num_regs % 2) == 0 && return_in_pc))
20758     {
20759       /* Scan for the single register to be popped.  Skip until the saved
20760          register is found.  */
20761       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20762
20763       /* Gen LDR with post increment here.  */
20764       tmp1 = gen_rtx_MEM (SImode,
20765                           gen_rtx_POST_INC (SImode,
20766                                             stack_pointer_rtx));
20767       set_mem_alias_set (tmp1, get_frame_alias_set ());
20768
20769       reg = gen_rtx_REG (SImode, j);
20770       tmp = gen_rtx_SET (reg, tmp1);
20771       RTX_FRAME_RELATED_P (tmp) = 1;
20772       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20773
20774       if (return_in_pc)
20775         {
20776           /* If return_in_pc, j must be PC_REGNUM.  */
20777           gcc_assert (j == PC_REGNUM);
20778           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20779           XVECEXP (par, 0, 0) = ret_rtx;
20780           XVECEXP (par, 0, 1) = tmp;
20781           par = emit_jump_insn (par);
20782         }
20783       else
20784         {
20785           par = emit_insn (tmp);
20786           REG_NOTES (par) = dwarf;
20787           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20788                                        stack_pointer_rtx, stack_pointer_rtx);
20789         }
20790
20791     }
20792   else if ((num_regs % 2) == 1 && return_in_pc)
20793     {
20794       /* There are 2 registers to be popped.  So, generate the pattern
20795          pop_multiple_with_stack_update_and_return to pop in PC.  */
20796       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20797     }
20798
20799   return;
20800 }
20801
20802 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20803    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20804    offset addressing and then generates one separate stack udpate. This provides
20805    more scheduling freedom, compared to writeback on every load.  However,
20806    if the function returns using load into PC directly
20807    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20808    before the last load.  TODO: Add a peephole optimization to recognize
20809    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20810    peephole optimization to merge the load at stack-offset zero
20811    with the stack update instruction using load with writeback
20812    in post-index addressing mode.  */
20813 static void
20814 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20815 {
20816   int j = 0;
20817   int offset = 0;
20818   rtx par = NULL_RTX;
20819   rtx dwarf = NULL_RTX;
20820   rtx tmp, mem;
20821
20822   /* Restore saved registers.  */
20823   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20824   j = 0;
20825   while (j <= LAST_ARM_REGNUM)
20826     if (saved_regs_mask & (1 << j))
20827       {
20828         if ((j % 2) == 0
20829             && (saved_regs_mask & (1 << (j + 1)))
20830             && (j + 1) != PC_REGNUM)
20831           {
20832             /* Current register and next register form register pair for which
20833                LDRD can be generated. PC is always the last register popped, and
20834                we handle it separately.  */
20835             if (offset > 0)
20836               mem = gen_frame_mem (DImode,
20837                                    plus_constant (Pmode,
20838                                                   stack_pointer_rtx,
20839                                                   offset));
20840             else
20841               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20842
20843             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20844             tmp = emit_insn (tmp);
20845             RTX_FRAME_RELATED_P (tmp) = 1;
20846
20847             /* Generate dwarf info.  */
20848
20849             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20850                                     gen_rtx_REG (SImode, j),
20851                                     NULL_RTX);
20852             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20853                                     gen_rtx_REG (SImode, j + 1),
20854                                     dwarf);
20855
20856             REG_NOTES (tmp) = dwarf;
20857
20858             offset += 8;
20859             j += 2;
20860           }
20861         else if (j != PC_REGNUM)
20862           {
20863             /* Emit a single word load.  */
20864             if (offset > 0)
20865               mem = gen_frame_mem (SImode,
20866                                    plus_constant (Pmode,
20867                                                   stack_pointer_rtx,
20868                                                   offset));
20869             else
20870               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20871
20872             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20873             tmp = emit_insn (tmp);
20874             RTX_FRAME_RELATED_P (tmp) = 1;
20875
20876             /* Generate dwarf info.  */
20877             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20878                                               gen_rtx_REG (SImode, j),
20879                                               NULL_RTX);
20880
20881             offset += 4;
20882             j += 1;
20883           }
20884         else /* j == PC_REGNUM */
20885           j++;
20886       }
20887     else
20888       j++;
20889
20890   /* Update the stack.  */
20891   if (offset > 0)
20892     {
20893       tmp = gen_rtx_SET (stack_pointer_rtx,
20894                          plus_constant (Pmode,
20895                                         stack_pointer_rtx,
20896                                         offset));
20897       tmp = emit_insn (tmp);
20898       arm_add_cfa_adjust_cfa_note (tmp, offset,
20899                                    stack_pointer_rtx, stack_pointer_rtx);
20900       offset = 0;
20901     }
20902
20903   if (saved_regs_mask & (1 << PC_REGNUM))
20904     {
20905       /* Only PC is to be popped.  */
20906       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20907       XVECEXP (par, 0, 0) = ret_rtx;
20908       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20909                          gen_frame_mem (SImode,
20910                                         gen_rtx_POST_INC (SImode,
20911                                                           stack_pointer_rtx)));
20912       RTX_FRAME_RELATED_P (tmp) = 1;
20913       XVECEXP (par, 0, 1) = tmp;
20914       par = emit_jump_insn (par);
20915
20916       /* Generate dwarf info.  */
20917       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20918                               gen_rtx_REG (SImode, PC_REGNUM),
20919                               NULL_RTX);
20920       REG_NOTES (par) = dwarf;
20921       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20922                                    stack_pointer_rtx, stack_pointer_rtx);
20923     }
20924 }
20925
20926 /* Calculate the size of the return value that is passed in registers.  */
20927 static unsigned
20928 arm_size_return_regs (void)
20929 {
20930   machine_mode mode;
20931
20932   if (crtl->return_rtx != 0)
20933     mode = GET_MODE (crtl->return_rtx);
20934   else
20935     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20936
20937   return GET_MODE_SIZE (mode);
20938 }
20939
20940 /* Return true if the current function needs to save/restore LR.  */
20941 static bool
20942 thumb_force_lr_save (void)
20943 {
20944   return !cfun->machine->lr_save_eliminated
20945          && (!crtl->is_leaf
20946              || thumb_far_jump_used_p ()
20947              || df_regs_ever_live_p (LR_REGNUM));
20948 }
20949
20950 /* We do not know if r3 will be available because
20951    we do have an indirect tailcall happening in this
20952    particular case.  */
20953 static bool
20954 is_indirect_tailcall_p (rtx call)
20955 {
20956   rtx pat = PATTERN (call);
20957
20958   /* Indirect tail call.  */
20959   pat = XVECEXP (pat, 0, 0);
20960   if (GET_CODE (pat) == SET)
20961     pat = SET_SRC (pat);
20962
20963   pat = XEXP (XEXP (pat, 0), 0);
20964   return REG_P (pat);
20965 }
20966
20967 /* Return true if r3 is used by any of the tail call insns in the
20968    current function.  */
20969 static bool
20970 any_sibcall_could_use_r3 (void)
20971 {
20972   edge_iterator ei;
20973   edge e;
20974
20975   if (!crtl->tail_call_emit)
20976     return false;
20977   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20978     if (e->flags & EDGE_SIBCALL)
20979       {
20980         rtx_insn *call = BB_END (e->src);
20981         if (!CALL_P (call))
20982           call = prev_nonnote_nondebug_insn (call);
20983         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20984         if (find_regno_fusage (call, USE, 3)
20985             || is_indirect_tailcall_p (call))
20986           return true;
20987       }
20988   return false;
20989 }
20990
20991
20992 /* Compute the distance from register FROM to register TO.
20993    These can be the arg pointer (26), the soft frame pointer (25),
20994    the stack pointer (13) or the hard frame pointer (11).
20995    In thumb mode r7 is used as the soft frame pointer, if needed.
20996    Typical stack layout looks like this:
20997
20998        old stack pointer -> |    |
20999                              ----
21000                             |    | \
21001                             |    |   saved arguments for
21002                             |    |   vararg functions
21003                             |    | /
21004                               --
21005    hard FP & arg pointer -> |    | \
21006                             |    |   stack
21007                             |    |   frame
21008                             |    | /
21009                               --
21010                             |    | \
21011                             |    |   call saved
21012                             |    |   registers
21013       soft frame pointer -> |    | /
21014                               --
21015                             |    | \
21016                             |    |   local
21017                             |    |   variables
21018      locals base pointer -> |    | /
21019                               --
21020                             |    | \
21021                             |    |   outgoing
21022                             |    |   arguments
21023    current stack pointer -> |    | /
21024                               --
21025
21026   For a given function some or all of these stack components
21027   may not be needed, giving rise to the possibility of
21028   eliminating some of the registers.
21029
21030   The values returned by this function must reflect the behavior
21031   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21032
21033   The sign of the number returned reflects the direction of stack
21034   growth, so the values are positive for all eliminations except
21035   from the soft frame pointer to the hard frame pointer.
21036
21037   SFP may point just inside the local variables block to ensure correct
21038   alignment.  */
21039
21040
21041 /* Return cached stack offsets.  */
21042
21043 static arm_stack_offsets *
21044 arm_get_frame_offsets (void)
21045 {
21046   struct arm_stack_offsets *offsets;
21047
21048   offsets = &cfun->machine->stack_offsets;
21049
21050   return offsets;
21051 }
21052
21053
21054 /* Calculate stack offsets.  These are used to calculate register elimination
21055    offsets and in prologue/epilogue code.  Also calculates which registers
21056    should be saved.  */
21057
21058 static void
21059 arm_compute_frame_layout (void)
21060 {
21061   struct arm_stack_offsets *offsets;
21062   unsigned long func_type;
21063   int saved;
21064   int core_saved;
21065   HOST_WIDE_INT frame_size;
21066   int i;
21067
21068   offsets = &cfun->machine->stack_offsets;
21069
21070   /* Initially this is the size of the local variables.  It will translated
21071      into an offset once we have determined the size of preceding data.  */
21072   frame_size = ROUND_UP_WORD (get_frame_size ());
21073
21074   /* Space for variadic functions.  */
21075   offsets->saved_args = crtl->args.pretend_args_size;
21076
21077   /* In Thumb mode this is incorrect, but never used.  */
21078   offsets->frame
21079     = (offsets->saved_args
21080        + arm_compute_static_chain_stack_bytes ()
21081        + (frame_pointer_needed ? 4 : 0));
21082
21083   if (TARGET_32BIT)
21084     {
21085       unsigned int regno;
21086
21087       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21088       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21089       saved = core_saved;
21090
21091       /* We know that SP will be doubleword aligned on entry, and we must
21092          preserve that condition at any subroutine call.  We also require the
21093          soft frame pointer to be doubleword aligned.  */
21094
21095       if (TARGET_REALLY_IWMMXT)
21096         {
21097           /* Check for the call-saved iWMMXt registers.  */
21098           for (regno = FIRST_IWMMXT_REGNUM;
21099                regno <= LAST_IWMMXT_REGNUM;
21100                regno++)
21101             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21102               saved += 8;
21103         }
21104
21105       func_type = arm_current_func_type ();
21106       /* Space for saved VFP registers.  */
21107       if (! IS_VOLATILE (func_type)
21108           && TARGET_HARD_FLOAT)
21109         saved += arm_get_vfp_saved_size ();
21110     }
21111   else /* TARGET_THUMB1 */
21112     {
21113       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21114       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21115       saved = core_saved;
21116       if (TARGET_BACKTRACE)
21117         saved += 16;
21118     }
21119
21120   /* Saved registers include the stack frame.  */
21121   offsets->saved_regs
21122     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21123   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21124
21125   /* A leaf function does not need any stack alignment if it has nothing
21126      on the stack.  */
21127   if (crtl->is_leaf && frame_size == 0
21128       /* However if it calls alloca(), we have a dynamically allocated
21129          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
21130       && ! cfun->calls_alloca)
21131     {
21132       offsets->outgoing_args = offsets->soft_frame;
21133       offsets->locals_base = offsets->soft_frame;
21134       return;
21135     }
21136
21137   /* Ensure SFP has the correct alignment.  */
21138   if (ARM_DOUBLEWORD_ALIGN
21139       && (offsets->soft_frame & 7))
21140     {
21141       offsets->soft_frame += 4;
21142       /* Try to align stack by pushing an extra reg.  Don't bother doing this
21143          when there is a stack frame as the alignment will be rolled into
21144          the normal stack adjustment.  */
21145       if (frame_size + crtl->outgoing_args_size == 0)
21146         {
21147           int reg = -1;
21148
21149           /* Register r3 is caller-saved.  Normally it does not need to be
21150              saved on entry by the prologue.  However if we choose to save
21151              it for padding then we may confuse the compiler into thinking
21152              a prologue sequence is required when in fact it is not.  This
21153              will occur when shrink-wrapping if r3 is used as a scratch
21154              register and there are no other callee-saved writes.
21155
21156              This situation can be avoided when other callee-saved registers
21157              are available and r3 is not mandatory if we choose a callee-saved
21158              register for padding.  */
21159           bool prefer_callee_reg_p = false;
21160
21161           /* If it is safe to use r3, then do so.  This sometimes
21162              generates better code on Thumb-2 by avoiding the need to
21163              use 32-bit push/pop instructions.  */
21164           if (! any_sibcall_could_use_r3 ()
21165               && arm_size_return_regs () <= 12
21166               && (offsets->saved_regs_mask & (1 << 3)) == 0
21167               && (TARGET_THUMB2
21168                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21169             {
21170               reg = 3;
21171               if (!TARGET_THUMB2)
21172                 prefer_callee_reg_p = true;
21173             }
21174           if (reg == -1
21175               || prefer_callee_reg_p)
21176             {
21177               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21178                 {
21179                   /* Avoid fixed registers; they may be changed at
21180                      arbitrary times so it's unsafe to restore them
21181                      during the epilogue.  */
21182                   if (!fixed_regs[i]
21183                       && (offsets->saved_regs_mask & (1 << i)) == 0)
21184                     {
21185                       reg = i;
21186                       break;
21187                     }
21188                 }
21189             }
21190
21191           if (reg != -1)
21192             {
21193               offsets->saved_regs += 4;
21194               offsets->saved_regs_mask |= (1 << reg);
21195             }
21196         }
21197     }
21198
21199   offsets->locals_base = offsets->soft_frame + frame_size;
21200   offsets->outgoing_args = (offsets->locals_base
21201                             + crtl->outgoing_args_size);
21202
21203   if (ARM_DOUBLEWORD_ALIGN)
21204     {
21205       /* Ensure SP remains doubleword aligned.  */
21206       if (offsets->outgoing_args & 7)
21207         offsets->outgoing_args += 4;
21208       gcc_assert (!(offsets->outgoing_args & 7));
21209     }
21210 }
21211
21212
21213 /* Calculate the relative offsets for the different stack pointers.  Positive
21214    offsets are in the direction of stack growth.  */
21215
21216 HOST_WIDE_INT
21217 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21218 {
21219   arm_stack_offsets *offsets;
21220
21221   offsets = arm_get_frame_offsets ();
21222
21223   /* OK, now we have enough information to compute the distances.
21224      There must be an entry in these switch tables for each pair
21225      of registers in ELIMINABLE_REGS, even if some of the entries
21226      seem to be redundant or useless.  */
21227   switch (from)
21228     {
21229     case ARG_POINTER_REGNUM:
21230       switch (to)
21231         {
21232         case THUMB_HARD_FRAME_POINTER_REGNUM:
21233           return 0;
21234
21235         case FRAME_POINTER_REGNUM:
21236           /* This is the reverse of the soft frame pointer
21237              to hard frame pointer elimination below.  */
21238           return offsets->soft_frame - offsets->saved_args;
21239
21240         case ARM_HARD_FRAME_POINTER_REGNUM:
21241           /* This is only non-zero in the case where the static chain register
21242              is stored above the frame.  */
21243           return offsets->frame - offsets->saved_args - 4;
21244
21245         case STACK_POINTER_REGNUM:
21246           /* If nothing has been pushed on the stack at all
21247              then this will return -4.  This *is* correct!  */
21248           return offsets->outgoing_args - (offsets->saved_args + 4);
21249
21250         default:
21251           gcc_unreachable ();
21252         }
21253       gcc_unreachable ();
21254
21255     case FRAME_POINTER_REGNUM:
21256       switch (to)
21257         {
21258         case THUMB_HARD_FRAME_POINTER_REGNUM:
21259           return 0;
21260
21261         case ARM_HARD_FRAME_POINTER_REGNUM:
21262           /* The hard frame pointer points to the top entry in the
21263              stack frame.  The soft frame pointer to the bottom entry
21264              in the stack frame.  If there is no stack frame at all,
21265              then they are identical.  */
21266
21267           return offsets->frame - offsets->soft_frame;
21268
21269         case STACK_POINTER_REGNUM:
21270           return offsets->outgoing_args - offsets->soft_frame;
21271
21272         default:
21273           gcc_unreachable ();
21274         }
21275       gcc_unreachable ();
21276
21277     default:
21278       /* You cannot eliminate from the stack pointer.
21279          In theory you could eliminate from the hard frame
21280          pointer to the stack pointer, but this will never
21281          happen, since if a stack frame is not needed the
21282          hard frame pointer will never be used.  */
21283       gcc_unreachable ();
21284     }
21285 }
21286
21287 /* Given FROM and TO register numbers, say whether this elimination is
21288    allowed.  Frame pointer elimination is automatically handled.
21289
21290    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21291    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21292    pointer, we must eliminate FRAME_POINTER_REGNUM into
21293    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21294    ARG_POINTER_REGNUM.  */
21295
21296 bool
21297 arm_can_eliminate (const int from, const int to)
21298 {
21299   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21300           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21301           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21302           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21303            true);
21304 }
21305
21306 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21307    number of bytes pushed.  */
21308
21309 static int
21310 arm_save_coproc_regs(void)
21311 {
21312   int saved_size = 0;
21313   unsigned reg;
21314   unsigned start_reg;
21315   rtx insn;
21316
21317   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21318     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21319       {
21320         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21321         insn = gen_rtx_MEM (V2SImode, insn);
21322         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21323         RTX_FRAME_RELATED_P (insn) = 1;
21324         saved_size += 8;
21325       }
21326
21327   if (TARGET_HARD_FLOAT)
21328     {
21329       start_reg = FIRST_VFP_REGNUM;
21330
21331       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21332         {
21333           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21334               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21335             {
21336               if (start_reg != reg)
21337                 saved_size += vfp_emit_fstmd (start_reg,
21338                                               (reg - start_reg) / 2);
21339               start_reg = reg + 2;
21340             }
21341         }
21342       if (start_reg != reg)
21343         saved_size += vfp_emit_fstmd (start_reg,
21344                                       (reg - start_reg) / 2);
21345     }
21346   return saved_size;
21347 }
21348
21349
21350 /* Set the Thumb frame pointer from the stack pointer.  */
21351
21352 static void
21353 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21354 {
21355   HOST_WIDE_INT amount;
21356   rtx insn, dwarf;
21357
21358   amount = offsets->outgoing_args - offsets->locals_base;
21359   if (amount < 1024)
21360     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21361                                   stack_pointer_rtx, GEN_INT (amount)));
21362   else
21363     {
21364       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21365       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21366          expects the first two operands to be the same.  */
21367       if (TARGET_THUMB2)
21368         {
21369           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21370                                         stack_pointer_rtx,
21371                                         hard_frame_pointer_rtx));
21372         }
21373       else
21374         {
21375           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21376                                         hard_frame_pointer_rtx,
21377                                         stack_pointer_rtx));
21378         }
21379       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21380                            plus_constant (Pmode, stack_pointer_rtx, amount));
21381       RTX_FRAME_RELATED_P (dwarf) = 1;
21382       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21383     }
21384
21385   RTX_FRAME_RELATED_P (insn) = 1;
21386 }
21387
21388 struct scratch_reg {
21389   rtx reg;
21390   bool saved;
21391 };
21392
21393 /* Return a short-lived scratch register for use as a 2nd scratch register on
21394    function entry after the registers are saved in the prologue.  This register
21395    must be released by means of release_scratch_register_on_entry.  IP is not
21396    considered since it is always used as the 1st scratch register if available.
21397
21398    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21399    mask of live registers.  */
21400
21401 static void
21402 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21403                                unsigned long live_regs)
21404 {
21405   int regno = -1;
21406
21407   sr->saved = false;
21408
21409   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21410     regno = LR_REGNUM;
21411   else
21412     {
21413       unsigned int i;
21414
21415       for (i = 4; i < 11; i++)
21416         if (regno1 != i && (live_regs & (1 << i)) != 0)
21417           {
21418             regno = i;
21419             break;
21420           }
21421
21422       if (regno < 0)
21423         {
21424           /* If IP is used as the 1st scratch register for a nested function,
21425              then either r3 wasn't available or is used to preserve IP.  */
21426           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21427             regno1 = 3;
21428           regno = (regno1 == 3 ? 2 : 3);
21429           sr->saved
21430             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21431                                regno);
21432         }
21433     }
21434
21435   sr->reg = gen_rtx_REG (SImode, regno);
21436   if (sr->saved)
21437     {
21438       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21439       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21440       rtx x = gen_rtx_SET (stack_pointer_rtx,
21441                            plus_constant (Pmode, stack_pointer_rtx, -4));
21442       RTX_FRAME_RELATED_P (insn) = 1;
21443       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21444     }
21445 }
21446
21447 /* Release a scratch register obtained from the preceding function.  */
21448
21449 static void
21450 release_scratch_register_on_entry (struct scratch_reg *sr)
21451 {
21452   if (sr->saved)
21453     {
21454       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21455       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21456       rtx x = gen_rtx_SET (stack_pointer_rtx,
21457                            plus_constant (Pmode, stack_pointer_rtx, 4));
21458       RTX_FRAME_RELATED_P (insn) = 1;
21459       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21460     }
21461 }
21462
21463 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21464
21465 #if PROBE_INTERVAL > 4096
21466 #error Cannot use indexed addressing mode for stack probing
21467 #endif
21468
21469 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21470    inclusive.  These are offsets from the current stack pointer.  REGNO1
21471    is the index number of the 1st scratch register and LIVE_REGS is the
21472    mask of live registers.  */
21473
21474 static void
21475 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21476                             unsigned int regno1, unsigned long live_regs)
21477 {
21478   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21479
21480   /* See if we have a constant small number of probes to generate.  If so,
21481      that's the easy case.  */
21482   if (size <= PROBE_INTERVAL)
21483     {
21484       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21485       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21486       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21487     }
21488
21489   /* The run-time loop is made up of 10 insns in the generic case while the
21490      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21491   else if (size <= 5 * PROBE_INTERVAL)
21492     {
21493       HOST_WIDE_INT i, rem;
21494
21495       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21496       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21497       emit_stack_probe (reg1);
21498
21499       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21500          it exceeds SIZE.  If only two probes are needed, this will not
21501          generate any code.  Then probe at FIRST + SIZE.  */
21502       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21503         {
21504           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21505           emit_stack_probe (reg1);
21506         }
21507
21508       rem = size - (i - PROBE_INTERVAL);
21509       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21510         {
21511           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21512           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21513         }
21514       else
21515         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21516     }
21517
21518   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21519      extra careful with variables wrapping around because we might be at
21520      the very top (or the very bottom) of the address space and we have
21521      to be able to handle this case properly; in particular, we use an
21522      equality test for the loop condition.  */
21523   else
21524     {
21525       HOST_WIDE_INT rounded_size;
21526       struct scratch_reg sr;
21527
21528       get_scratch_register_on_entry (&sr, regno1, live_regs);
21529
21530       emit_move_insn (reg1, GEN_INT (first));
21531
21532
21533       /* Step 1: round SIZE to the previous multiple of the interval.  */
21534
21535       rounded_size = size & -PROBE_INTERVAL;
21536       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21537
21538
21539       /* Step 2: compute initial and final value of the loop counter.  */
21540
21541       /* TEST_ADDR = SP + FIRST.  */
21542       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21543
21544       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21545       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21546
21547
21548       /* Step 3: the loop
21549
21550          do
21551            {
21552              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21553              probe at TEST_ADDR
21554            }
21555          while (TEST_ADDR != LAST_ADDR)
21556
21557          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21558          until it is equal to ROUNDED_SIZE.  */
21559
21560       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21561
21562
21563       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21564          that SIZE is equal to ROUNDED_SIZE.  */
21565
21566       if (size != rounded_size)
21567         {
21568           HOST_WIDE_INT rem = size - rounded_size;
21569
21570           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21571             {
21572               emit_set_insn (sr.reg,
21573                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21574               emit_stack_probe (plus_constant (Pmode, sr.reg,
21575                                                PROBE_INTERVAL - rem));
21576             }
21577           else
21578             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21579         }
21580
21581       release_scratch_register_on_entry (&sr);
21582     }
21583
21584   /* Make sure nothing is scheduled before we are done.  */
21585   emit_insn (gen_blockage ());
21586 }
21587
21588 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21589    absolute addresses.  */
21590
21591 const char *
21592 output_probe_stack_range (rtx reg1, rtx reg2)
21593 {
21594   static int labelno = 0;
21595   char loop_lab[32];
21596   rtx xops[2];
21597
21598   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21599
21600   /* Loop.  */
21601   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21602
21603   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21604   xops[0] = reg1;
21605   xops[1] = GEN_INT (PROBE_INTERVAL);
21606   output_asm_insn ("sub\t%0, %0, %1", xops);
21607
21608   /* Probe at TEST_ADDR.  */
21609   output_asm_insn ("str\tr0, [%0, #0]", xops);
21610
21611   /* Test if TEST_ADDR == LAST_ADDR.  */
21612   xops[1] = reg2;
21613   output_asm_insn ("cmp\t%0, %1", xops);
21614
21615   /* Branch.  */
21616   fputs ("\tbne\t", asm_out_file);
21617   assemble_name_raw (asm_out_file, loop_lab);
21618   fputc ('\n', asm_out_file);
21619
21620   return "";
21621 }
21622
21623 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21624    function.  */
21625 void
21626 arm_expand_prologue (void)
21627 {
21628   rtx amount;
21629   rtx insn;
21630   rtx ip_rtx;
21631   unsigned long live_regs_mask;
21632   unsigned long func_type;
21633   int fp_offset = 0;
21634   int saved_pretend_args = 0;
21635   int saved_regs = 0;
21636   unsigned HOST_WIDE_INT args_to_push;
21637   HOST_WIDE_INT size;
21638   arm_stack_offsets *offsets;
21639   bool clobber_ip;
21640
21641   func_type = arm_current_func_type ();
21642
21643   /* Naked functions don't have prologues.  */
21644   if (IS_NAKED (func_type))
21645     {
21646       if (flag_stack_usage_info)
21647         current_function_static_stack_size = 0;
21648       return;
21649     }
21650
21651   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21652   args_to_push = crtl->args.pretend_args_size;
21653
21654   /* Compute which register we will have to save onto the stack.  */
21655   offsets = arm_get_frame_offsets ();
21656   live_regs_mask = offsets->saved_regs_mask;
21657
21658   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21659
21660   if (IS_STACKALIGN (func_type))
21661     {
21662       rtx r0, r1;
21663
21664       /* Handle a word-aligned stack pointer.  We generate the following:
21665
21666           mov r0, sp
21667           bic r1, r0, #7
21668           mov sp, r1
21669           <save and restore r0 in normal prologue/epilogue>
21670           mov sp, r0
21671           bx lr
21672
21673          The unwinder doesn't need to know about the stack realignment.
21674          Just tell it we saved SP in r0.  */
21675       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21676
21677       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21678       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21679
21680       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21681       RTX_FRAME_RELATED_P (insn) = 1;
21682       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21683
21684       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21685
21686       /* ??? The CFA changes here, which may cause GDB to conclude that it
21687          has entered a different function.  That said, the unwind info is
21688          correct, individually, before and after this instruction because
21689          we've described the save of SP, which will override the default
21690          handling of SP as restoring from the CFA.  */
21691       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21692     }
21693
21694   /* Let's compute the static_chain_stack_bytes required and store it.  Right
21695      now the value must be -1 as stored by arm_init_machine_status ().  */
21696   cfun->machine->static_chain_stack_bytes
21697     = arm_compute_static_chain_stack_bytes ();
21698
21699   /* The static chain register is the same as the IP register.  If it is
21700      clobbered when creating the frame, we need to save and restore it.  */
21701   clobber_ip = IS_NESTED (func_type)
21702                && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21703                    || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21704                         || flag_stack_clash_protection)
21705                        && !df_regs_ever_live_p (LR_REGNUM)
21706                        && arm_r3_live_at_start_p ()));
21707
21708   /* Find somewhere to store IP whilst the frame is being created.
21709      We try the following places in order:
21710
21711        1. The last argument register r3 if it is available.
21712        2. A slot on the stack above the frame if there are no
21713           arguments to push onto the stack.
21714        3. Register r3 again, after pushing the argument registers
21715           onto the stack, if this is a varargs function.
21716        4. The last slot on the stack created for the arguments to
21717           push, if this isn't a varargs function.
21718
21719      Note - we only need to tell the dwarf2 backend about the SP
21720      adjustment in the second variant; the static chain register
21721      doesn't need to be unwound, as it doesn't contain a value
21722      inherited from the caller.  */
21723   if (clobber_ip)
21724     {
21725       if (!arm_r3_live_at_start_p ())
21726         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21727       else if (args_to_push == 0)
21728         {
21729           rtx addr, dwarf;
21730
21731           gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21732           saved_regs += 4;
21733
21734           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21735           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21736           fp_offset = 4;
21737
21738           /* Just tell the dwarf backend that we adjusted SP.  */
21739           dwarf = gen_rtx_SET (stack_pointer_rtx,
21740                                plus_constant (Pmode, stack_pointer_rtx,
21741                                               -fp_offset));
21742           RTX_FRAME_RELATED_P (insn) = 1;
21743           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21744         }
21745       else
21746         {
21747           /* Store the args on the stack.  */
21748           if (cfun->machine->uses_anonymous_args)
21749             {
21750               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21751                                           (0xf0 >> (args_to_push / 4)) & 0xf);
21752               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21753               saved_pretend_args = 1;
21754             }
21755           else
21756             {
21757               rtx addr, dwarf;
21758
21759               if (args_to_push == 4)
21760                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21761               else
21762                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21763                                            plus_constant (Pmode,
21764                                                           stack_pointer_rtx,
21765                                                           -args_to_push));
21766
21767               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21768
21769               /* Just tell the dwarf backend that we adjusted SP.  */
21770               dwarf = gen_rtx_SET (stack_pointer_rtx,
21771                                    plus_constant (Pmode, stack_pointer_rtx,
21772                                                   -args_to_push));
21773               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21774             }
21775
21776           RTX_FRAME_RELATED_P (insn) = 1;
21777           fp_offset = args_to_push;
21778           args_to_push = 0;
21779         }
21780     }
21781
21782   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21783     {
21784       if (IS_INTERRUPT (func_type))
21785         {
21786           /* Interrupt functions must not corrupt any registers.
21787              Creating a frame pointer however, corrupts the IP
21788              register, so we must push it first.  */
21789           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21790
21791           /* Do not set RTX_FRAME_RELATED_P on this insn.
21792              The dwarf stack unwinding code only wants to see one
21793              stack decrement per function, and this is not it.  If
21794              this instruction is labeled as being part of the frame
21795              creation sequence then dwarf2out_frame_debug_expr will
21796              die when it encounters the assignment of IP to FP
21797              later on, since the use of SP here establishes SP as
21798              the CFA register and not IP.
21799
21800              Anyway this instruction is not really part of the stack
21801              frame creation although it is part of the prologue.  */
21802         }
21803
21804       insn = emit_set_insn (ip_rtx,
21805                             plus_constant (Pmode, stack_pointer_rtx,
21806                                            fp_offset));
21807       RTX_FRAME_RELATED_P (insn) = 1;
21808     }
21809
21810   if (args_to_push)
21811     {
21812       /* Push the argument registers, or reserve space for them.  */
21813       if (cfun->machine->uses_anonymous_args)
21814         insn = emit_multi_reg_push
21815           ((0xf0 >> (args_to_push / 4)) & 0xf,
21816            (0xf0 >> (args_to_push / 4)) & 0xf);
21817       else
21818         insn = emit_insn
21819           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21820                        GEN_INT (- args_to_push)));
21821       RTX_FRAME_RELATED_P (insn) = 1;
21822     }
21823
21824   /* If this is an interrupt service routine, and the link register
21825      is going to be pushed, and we're not generating extra
21826      push of IP (needed when frame is needed and frame layout if apcs),
21827      subtracting four from LR now will mean that the function return
21828      can be done with a single instruction.  */
21829   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21830       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21831       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21832       && TARGET_ARM)
21833     {
21834       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21835
21836       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21837     }
21838
21839   if (live_regs_mask)
21840     {
21841       unsigned long dwarf_regs_mask = live_regs_mask;
21842
21843       saved_regs += bit_count (live_regs_mask) * 4;
21844       if (optimize_size && !frame_pointer_needed
21845           && saved_regs == offsets->saved_regs - offsets->saved_args)
21846         {
21847           /* If no coprocessor registers are being pushed and we don't have
21848              to worry about a frame pointer then push extra registers to
21849              create the stack frame.  This is done in a way that does not
21850              alter the frame layout, so is independent of the epilogue.  */
21851           int n;
21852           int frame;
21853           n = 0;
21854           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21855             n++;
21856           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21857           if (frame && n * 4 >= frame)
21858             {
21859               n = frame / 4;
21860               live_regs_mask |= (1 << n) - 1;
21861               saved_regs += frame;
21862             }
21863         }
21864
21865       if (TARGET_LDRD
21866           && current_tune->prefer_ldrd_strd
21867           && !optimize_function_for_size_p (cfun))
21868         {
21869           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21870           if (TARGET_THUMB2)
21871             thumb2_emit_strd_push (live_regs_mask);
21872           else if (TARGET_ARM
21873                    && !TARGET_APCS_FRAME
21874                    && !IS_INTERRUPT (func_type))
21875             arm_emit_strd_push (live_regs_mask);
21876           else
21877             {
21878               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21879               RTX_FRAME_RELATED_P (insn) = 1;
21880             }
21881         }
21882       else
21883         {
21884           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21885           RTX_FRAME_RELATED_P (insn) = 1;
21886         }
21887     }
21888
21889   if (! IS_VOLATILE (func_type))
21890     saved_regs += arm_save_coproc_regs ();
21891
21892   if (frame_pointer_needed && TARGET_ARM)
21893     {
21894       /* Create the new frame pointer.  */
21895       if (TARGET_APCS_FRAME)
21896         {
21897           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21898           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21899           RTX_FRAME_RELATED_P (insn) = 1;
21900         }
21901       else
21902         {
21903           insn = GEN_INT (saved_regs - (4 + fp_offset));
21904           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21905                                         stack_pointer_rtx, insn));
21906           RTX_FRAME_RELATED_P (insn) = 1;
21907         }
21908     }
21909
21910   size = offsets->outgoing_args - offsets->saved_args;
21911   if (flag_stack_usage_info)
21912     current_function_static_stack_size = size;
21913
21914   /* If this isn't an interrupt service routine and we have a frame, then do
21915      stack checking.  We use IP as the first scratch register, except for the
21916      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21917   if (!IS_INTERRUPT (func_type)
21918       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21919           || flag_stack_clash_protection))
21920     {
21921       unsigned int regno;
21922
21923       if (!IS_NESTED (func_type) || clobber_ip)
21924         regno = IP_REGNUM;
21925       else if (df_regs_ever_live_p (LR_REGNUM))
21926         regno = LR_REGNUM;
21927       else
21928         regno = 3;
21929
21930       if (crtl->is_leaf && !cfun->calls_alloca)
21931         {
21932           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21933             arm_emit_probe_stack_range (get_stack_check_protect (),
21934                                         size - get_stack_check_protect (),
21935                                         regno, live_regs_mask);
21936         }
21937       else if (size > 0)
21938         arm_emit_probe_stack_range (get_stack_check_protect (), size,
21939                                     regno, live_regs_mask);
21940     }
21941
21942   /* Recover the static chain register.  */
21943   if (clobber_ip)
21944     {
21945       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21946         insn = gen_rtx_REG (SImode, 3);
21947       else
21948         {
21949           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21950           insn = gen_frame_mem (SImode, insn);
21951         }
21952       emit_set_insn (ip_rtx, insn);
21953       emit_insn (gen_force_register_use (ip_rtx));
21954     }
21955
21956   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21957     {
21958       /* This add can produce multiple insns for a large constant, so we
21959          need to get tricky.  */
21960       rtx_insn *last = get_last_insn ();
21961
21962       amount = GEN_INT (offsets->saved_args + saved_regs
21963                         - offsets->outgoing_args);
21964
21965       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21966                                     amount));
21967       do
21968         {
21969           last = last ? NEXT_INSN (last) : get_insns ();
21970           RTX_FRAME_RELATED_P (last) = 1;
21971         }
21972       while (last != insn);
21973
21974       /* If the frame pointer is needed, emit a special barrier that
21975          will prevent the scheduler from moving stores to the frame
21976          before the stack adjustment.  */
21977       if (frame_pointer_needed)
21978         emit_insn (gen_stack_tie (stack_pointer_rtx,
21979                                   hard_frame_pointer_rtx));
21980     }
21981
21982
21983   if (frame_pointer_needed && TARGET_THUMB2)
21984     thumb_set_frame_pointer (offsets);
21985
21986   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21987     {
21988       unsigned long mask;
21989
21990       mask = live_regs_mask;
21991       mask &= THUMB2_WORK_REGS;
21992       if (!IS_NESTED (func_type))
21993         mask |= (1 << IP_REGNUM);
21994       arm_load_pic_register (mask);
21995     }
21996
21997   /* If we are profiling, make sure no instructions are scheduled before
21998      the call to mcount.  Similarly if the user has requested no
21999      scheduling in the prolog.  Similarly if we want non-call exceptions
22000      using the EABI unwinder, to prevent faulting instructions from being
22001      swapped with a stack adjustment.  */
22002   if (crtl->profile || !TARGET_SCHED_PROLOG
22003       || (arm_except_unwind_info (&global_options) == UI_TARGET
22004           && cfun->can_throw_non_call_exceptions))
22005     emit_insn (gen_blockage ());
22006
22007   /* If the link register is being kept alive, with the return address in it,
22008      then make sure that it does not get reused by the ce2 pass.  */
22009   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22010     cfun->machine->lr_save_eliminated = 1;
22011 }
22012 \f
22013 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
22014 static void
22015 arm_print_condition (FILE *stream)
22016 {
22017   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22018     {
22019       /* Branch conversion is not implemented for Thumb-2.  */
22020       if (TARGET_THUMB)
22021         {
22022           output_operand_lossage ("predicated Thumb instruction");
22023           return;
22024         }
22025       if (current_insn_predicate != NULL)
22026         {
22027           output_operand_lossage
22028             ("predicated instruction in conditional sequence");
22029           return;
22030         }
22031
22032       fputs (arm_condition_codes[arm_current_cc], stream);
22033     }
22034   else if (current_insn_predicate)
22035     {
22036       enum arm_cond_code code;
22037
22038       if (TARGET_THUMB1)
22039         {
22040           output_operand_lossage ("predicated Thumb instruction");
22041           return;
22042         }
22043
22044       code = get_arm_condition_code (current_insn_predicate);
22045       fputs (arm_condition_codes[code], stream);
22046     }
22047 }
22048
22049
22050 /* Globally reserved letters: acln
22051    Puncutation letters currently used: @_|?().!#
22052    Lower case letters currently used: bcdefhimpqtvwxyz
22053    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22054    Letters previously used, but now deprecated/obsolete: sVWXYZ.
22055
22056    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22057
22058    If CODE is 'd', then the X is a condition operand and the instruction
22059    should only be executed if the condition is true.
22060    if CODE is 'D', then the X is a condition operand and the instruction
22061    should only be executed if the condition is false: however, if the mode
22062    of the comparison is CCFPEmode, then always execute the instruction -- we
22063    do this because in these circumstances !GE does not necessarily imply LT;
22064    in these cases the instruction pattern will take care to make sure that
22065    an instruction containing %d will follow, thereby undoing the effects of
22066    doing this instruction unconditionally.
22067    If CODE is 'N' then X is a floating point operand that must be negated
22068    before output.
22069    If CODE is 'B' then output a bitwise inverted value of X (a const int).
22070    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
22071 static void
22072 arm_print_operand (FILE *stream, rtx x, int code)
22073 {
22074   switch (code)
22075     {
22076     case '@':
22077       fputs (ASM_COMMENT_START, stream);
22078       return;
22079
22080     case '_':
22081       fputs (user_label_prefix, stream);
22082       return;
22083
22084     case '|':
22085       fputs (REGISTER_PREFIX, stream);
22086       return;
22087
22088     case '?':
22089       arm_print_condition (stream);
22090       return;
22091
22092     case '.':
22093       /* The current condition code for a condition code setting instruction.
22094          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
22095       fputc('s', stream);
22096       arm_print_condition (stream);
22097       return;
22098
22099     case '!':
22100       /* If the instruction is conditionally executed then print
22101          the current condition code, otherwise print 's'.  */
22102       gcc_assert (TARGET_THUMB2);
22103       if (current_insn_predicate)
22104         arm_print_condition (stream);
22105       else
22106         fputc('s', stream);
22107       break;
22108
22109     /* %# is a "break" sequence. It doesn't output anything, but is used to
22110        separate e.g. operand numbers from following text, if that text consists
22111        of further digits which we don't want to be part of the operand
22112        number.  */
22113     case '#':
22114       return;
22115
22116     case 'N':
22117       {
22118         REAL_VALUE_TYPE r;
22119         r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22120         fprintf (stream, "%s", fp_const_from_val (&r));
22121       }
22122       return;
22123
22124     /* An integer or symbol address without a preceding # sign.  */
22125     case 'c':
22126       switch (GET_CODE (x))
22127         {
22128         case CONST_INT:
22129           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22130           break;
22131
22132         case SYMBOL_REF:
22133           output_addr_const (stream, x);
22134           break;
22135
22136         case CONST:
22137           if (GET_CODE (XEXP (x, 0)) == PLUS
22138               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22139             {
22140               output_addr_const (stream, x);
22141               break;
22142             }
22143           /* Fall through.  */
22144
22145         default:
22146           output_operand_lossage ("Unsupported operand for code '%c'", code);
22147         }
22148       return;
22149
22150     /* An integer that we want to print in HEX.  */
22151     case 'x':
22152       switch (GET_CODE (x))
22153         {
22154         case CONST_INT:
22155           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22156           break;
22157
22158         default:
22159           output_operand_lossage ("Unsupported operand for code '%c'", code);
22160         }
22161       return;
22162
22163     case 'B':
22164       if (CONST_INT_P (x))
22165         {
22166           HOST_WIDE_INT val;
22167           val = ARM_SIGN_EXTEND (~INTVAL (x));
22168           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22169         }
22170       else
22171         {
22172           putc ('~', stream);
22173           output_addr_const (stream, x);
22174         }
22175       return;
22176
22177     case 'b':
22178       /* Print the log2 of a CONST_INT.  */
22179       {
22180         HOST_WIDE_INT val;
22181
22182         if (!CONST_INT_P (x)
22183             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22184           output_operand_lossage ("Unsupported operand for code '%c'", code);
22185         else
22186           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22187       }
22188       return;
22189
22190     case 'L':
22191       /* The low 16 bits of an immediate constant.  */
22192       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22193       return;
22194
22195     case 'i':
22196       fprintf (stream, "%s", arithmetic_instr (x, 1));
22197       return;
22198
22199     case 'I':
22200       fprintf (stream, "%s", arithmetic_instr (x, 0));
22201       return;
22202
22203     case 'S':
22204       {
22205         HOST_WIDE_INT val;
22206         const char *shift;
22207
22208         shift = shift_op (x, &val);
22209
22210         if (shift)
22211           {
22212             fprintf (stream, ", %s ", shift);
22213             if (val == -1)
22214               arm_print_operand (stream, XEXP (x, 1), 0);
22215             else
22216               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22217           }
22218       }
22219       return;
22220
22221       /* An explanation of the 'Q', 'R' and 'H' register operands:
22222
22223          In a pair of registers containing a DI or DF value the 'Q'
22224          operand returns the register number of the register containing
22225          the least significant part of the value.  The 'R' operand returns
22226          the register number of the register containing the most
22227          significant part of the value.
22228
22229          The 'H' operand returns the higher of the two register numbers.
22230          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22231          same as the 'Q' operand, since the most significant part of the
22232          value is held in the lower number register.  The reverse is true
22233          on systems where WORDS_BIG_ENDIAN is false.
22234
22235          The purpose of these operands is to distinguish between cases
22236          where the endian-ness of the values is important (for example
22237          when they are added together), and cases where the endian-ness
22238          is irrelevant, but the order of register operations is important.
22239          For example when loading a value from memory into a register
22240          pair, the endian-ness does not matter.  Provided that the value
22241          from the lower memory address is put into the lower numbered
22242          register, and the value from the higher address is put into the
22243          higher numbered register, the load will work regardless of whether
22244          the value being loaded is big-wordian or little-wordian.  The
22245          order of the two register loads can matter however, if the address
22246          of the memory location is actually held in one of the registers
22247          being overwritten by the load.
22248
22249          The 'Q' and 'R' constraints are also available for 64-bit
22250          constants.  */
22251     case 'Q':
22252       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22253         {
22254           rtx part = gen_lowpart (SImode, x);
22255           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22256           return;
22257         }
22258
22259       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22260         {
22261           output_operand_lossage ("invalid operand for code '%c'", code);
22262           return;
22263         }
22264
22265       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22266       return;
22267
22268     case 'R':
22269       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22270         {
22271           machine_mode mode = GET_MODE (x);
22272           rtx part;
22273
22274           if (mode == VOIDmode)
22275             mode = DImode;
22276           part = gen_highpart_mode (SImode, mode, x);
22277           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22278           return;
22279         }
22280
22281       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22282         {
22283           output_operand_lossage ("invalid operand for code '%c'", code);
22284           return;
22285         }
22286
22287       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22288       return;
22289
22290     case 'H':
22291       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22292         {
22293           output_operand_lossage ("invalid operand for code '%c'", code);
22294           return;
22295         }
22296
22297       asm_fprintf (stream, "%r", REGNO (x) + 1);
22298       return;
22299
22300     case 'J':
22301       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22302         {
22303           output_operand_lossage ("invalid operand for code '%c'", code);
22304           return;
22305         }
22306
22307       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22308       return;
22309
22310     case 'K':
22311       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22312         {
22313           output_operand_lossage ("invalid operand for code '%c'", code);
22314           return;
22315         }
22316
22317       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22318       return;
22319
22320     case 'm':
22321       asm_fprintf (stream, "%r",
22322                    REG_P (XEXP (x, 0))
22323                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22324       return;
22325
22326     case 'M':
22327       asm_fprintf (stream, "{%r-%r}",
22328                    REGNO (x),
22329                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22330       return;
22331
22332     /* Like 'M', but writing doubleword vector registers, for use by Neon
22333        insns.  */
22334     case 'h':
22335       {
22336         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22337         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22338         if (numregs == 1)
22339           asm_fprintf (stream, "{d%d}", regno);
22340         else
22341           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22342       }
22343       return;
22344
22345     case 'd':
22346       /* CONST_TRUE_RTX means always -- that's the default.  */
22347       if (x == const_true_rtx)
22348         return;
22349
22350       if (!COMPARISON_P (x))
22351         {
22352           output_operand_lossage ("invalid operand for code '%c'", code);
22353           return;
22354         }
22355
22356       fputs (arm_condition_codes[get_arm_condition_code (x)],
22357              stream);
22358       return;
22359
22360     case 'D':
22361       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22362          want to do that.  */
22363       if (x == const_true_rtx)
22364         {
22365           output_operand_lossage ("instruction never executed");
22366           return;
22367         }
22368       if (!COMPARISON_P (x))
22369         {
22370           output_operand_lossage ("invalid operand for code '%c'", code);
22371           return;
22372         }
22373
22374       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22375                                  (get_arm_condition_code (x))],
22376              stream);
22377       return;
22378
22379     case 's':
22380     case 'V':
22381     case 'W':
22382     case 'X':
22383     case 'Y':
22384     case 'Z':
22385       /* Former Maverick support, removed after GCC-4.7.  */
22386       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22387       return;
22388
22389     case 'U':
22390       if (!REG_P (x)
22391           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22392           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22393         /* Bad value for wCG register number.  */
22394         {
22395           output_operand_lossage ("invalid operand for code '%c'", code);
22396           return;
22397         }
22398
22399       else
22400         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22401       return;
22402
22403       /* Print an iWMMXt control register name.  */
22404     case 'w':
22405       if (!CONST_INT_P (x)
22406           || INTVAL (x) < 0
22407           || INTVAL (x) >= 16)
22408         /* Bad value for wC register number.  */
22409         {
22410           output_operand_lossage ("invalid operand for code '%c'", code);
22411           return;
22412         }
22413
22414       else
22415         {
22416           static const char * wc_reg_names [16] =
22417             {
22418               "wCID",  "wCon",  "wCSSF", "wCASF",
22419               "wC4",   "wC5",   "wC6",   "wC7",
22420               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22421               "wC12",  "wC13",  "wC14",  "wC15"
22422             };
22423
22424           fputs (wc_reg_names [INTVAL (x)], stream);
22425         }
22426       return;
22427
22428     /* Print the high single-precision register of a VFP double-precision
22429        register.  */
22430     case 'p':
22431       {
22432         machine_mode mode = GET_MODE (x);
22433         int regno;
22434
22435         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22436           {
22437             output_operand_lossage ("invalid operand for code '%c'", code);
22438             return;
22439           }
22440
22441         regno = REGNO (x);
22442         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22443           {
22444             output_operand_lossage ("invalid operand for code '%c'", code);
22445             return;
22446           }
22447
22448         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22449       }
22450       return;
22451
22452     /* Print a VFP/Neon double precision or quad precision register name.  */
22453     case 'P':
22454     case 'q':
22455       {
22456         machine_mode mode = GET_MODE (x);
22457         int is_quad = (code == 'q');
22458         int regno;
22459
22460         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22461           {
22462             output_operand_lossage ("invalid operand for code '%c'", code);
22463             return;
22464           }
22465
22466         if (!REG_P (x)
22467             || !IS_VFP_REGNUM (REGNO (x)))
22468           {
22469             output_operand_lossage ("invalid operand for code '%c'", code);
22470             return;
22471           }
22472
22473         regno = REGNO (x);
22474         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22475             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22476           {
22477             output_operand_lossage ("invalid operand for code '%c'", code);
22478             return;
22479           }
22480
22481         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22482           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22483       }
22484       return;
22485
22486     /* These two codes print the low/high doubleword register of a Neon quad
22487        register, respectively.  For pair-structure types, can also print
22488        low/high quadword registers.  */
22489     case 'e':
22490     case 'f':
22491       {
22492         machine_mode mode = GET_MODE (x);
22493         int regno;
22494
22495         if ((GET_MODE_SIZE (mode) != 16
22496              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22497           {
22498             output_operand_lossage ("invalid operand for code '%c'", code);
22499             return;
22500           }
22501
22502         regno = REGNO (x);
22503         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22504           {
22505             output_operand_lossage ("invalid operand for code '%c'", code);
22506             return;
22507           }
22508
22509         if (GET_MODE_SIZE (mode) == 16)
22510           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22511                                   + (code == 'f' ? 1 : 0));
22512         else
22513           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22514                                   + (code == 'f' ? 1 : 0));
22515       }
22516       return;
22517
22518     /* Print a VFPv3 floating-point constant, represented as an integer
22519        index.  */
22520     case 'G':
22521       {
22522         int index = vfp3_const_double_index (x);
22523         gcc_assert (index != -1);
22524         fprintf (stream, "%d", index);
22525       }
22526       return;
22527
22528     /* Print bits representing opcode features for Neon.
22529
22530        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22531        and polynomials as unsigned.
22532
22533        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22534
22535        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22536
22537     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22538     case 'T':
22539       {
22540         HOST_WIDE_INT bits = INTVAL (x);
22541         fputc ("uspf"[bits & 3], stream);
22542       }
22543       return;
22544
22545     /* Likewise, but signed and unsigned integers are both 'i'.  */
22546     case 'F':
22547       {
22548         HOST_WIDE_INT bits = INTVAL (x);
22549         fputc ("iipf"[bits & 3], stream);
22550       }
22551       return;
22552
22553     /* As for 'T', but emit 'u' instead of 'p'.  */
22554     case 't':
22555       {
22556         HOST_WIDE_INT bits = INTVAL (x);
22557         fputc ("usuf"[bits & 3], stream);
22558       }
22559       return;
22560
22561     /* Bit 2: rounding (vs none).  */
22562     case 'O':
22563       {
22564         HOST_WIDE_INT bits = INTVAL (x);
22565         fputs ((bits & 4) != 0 ? "r" : "", stream);
22566       }
22567       return;
22568
22569     /* Memory operand for vld1/vst1 instruction.  */
22570     case 'A':
22571       {
22572         rtx addr;
22573         bool postinc = FALSE;
22574         rtx postinc_reg = NULL;
22575         unsigned align, memsize, align_bits;
22576
22577         gcc_assert (MEM_P (x));
22578         addr = XEXP (x, 0);
22579         if (GET_CODE (addr) == POST_INC)
22580           {
22581             postinc = 1;
22582             addr = XEXP (addr, 0);
22583           }
22584         if (GET_CODE (addr) == POST_MODIFY)
22585           {
22586             postinc_reg = XEXP( XEXP (addr, 1), 1);
22587             addr = XEXP (addr, 0);
22588           }
22589         asm_fprintf (stream, "[%r", REGNO (addr));
22590
22591         /* We know the alignment of this access, so we can emit a hint in the
22592            instruction (for some alignments) as an aid to the memory subsystem
22593            of the target.  */
22594         align = MEM_ALIGN (x) >> 3;
22595         memsize = MEM_SIZE (x);
22596
22597         /* Only certain alignment specifiers are supported by the hardware.  */
22598         if (memsize == 32 && (align % 32) == 0)
22599           align_bits = 256;
22600         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22601           align_bits = 128;
22602         else if (memsize >= 8 && (align % 8) == 0)
22603           align_bits = 64;
22604         else
22605           align_bits = 0;
22606
22607         if (align_bits != 0)
22608           asm_fprintf (stream, ":%d", align_bits);
22609
22610         asm_fprintf (stream, "]");
22611
22612         if (postinc)
22613           fputs("!", stream);
22614         if (postinc_reg)
22615           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22616       }
22617       return;
22618
22619     case 'C':
22620       {
22621         rtx addr;
22622
22623         gcc_assert (MEM_P (x));
22624         addr = XEXP (x, 0);
22625         gcc_assert (REG_P (addr));
22626         asm_fprintf (stream, "[%r]", REGNO (addr));
22627       }
22628       return;
22629
22630     /* Translate an S register number into a D register number and element index.  */
22631     case 'y':
22632       {
22633         machine_mode mode = GET_MODE (x);
22634         int regno;
22635
22636         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22637           {
22638             output_operand_lossage ("invalid operand for code '%c'", code);
22639             return;
22640           }
22641
22642         regno = REGNO (x);
22643         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22644           {
22645             output_operand_lossage ("invalid operand for code '%c'", code);
22646             return;
22647           }
22648
22649         regno = regno - FIRST_VFP_REGNUM;
22650         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22651       }
22652       return;
22653
22654     case 'v':
22655         gcc_assert (CONST_DOUBLE_P (x));
22656         int result;
22657         result = vfp3_const_double_for_fract_bits (x);
22658         if (result == 0)
22659           result = vfp3_const_double_for_bits (x);
22660         fprintf (stream, "#%d", result);
22661         return;
22662
22663     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22664        number into a D register number and element index.  */
22665     case 'z':
22666       {
22667         machine_mode mode = GET_MODE (x);
22668         int regno;
22669
22670         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22671           {
22672             output_operand_lossage ("invalid operand for code '%c'", code);
22673             return;
22674           }
22675
22676         regno = REGNO (x);
22677         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22678           {
22679             output_operand_lossage ("invalid operand for code '%c'", code);
22680             return;
22681           }
22682
22683         regno = regno - FIRST_VFP_REGNUM;
22684         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22685       }
22686       return;
22687
22688     default:
22689       if (x == 0)
22690         {
22691           output_operand_lossage ("missing operand");
22692           return;
22693         }
22694
22695       switch (GET_CODE (x))
22696         {
22697         case REG:
22698           asm_fprintf (stream, "%r", REGNO (x));
22699           break;
22700
22701         case MEM:
22702           output_address (GET_MODE (x), XEXP (x, 0));
22703           break;
22704
22705         case CONST_DOUBLE:
22706           {
22707             char fpstr[20];
22708             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22709                               sizeof (fpstr), 0, 1);
22710             fprintf (stream, "#%s", fpstr);
22711           }
22712           break;
22713
22714         default:
22715           gcc_assert (GET_CODE (x) != NEG);
22716           fputc ('#', stream);
22717           if (GET_CODE (x) == HIGH)
22718             {
22719               fputs (":lower16:", stream);
22720               x = XEXP (x, 0);
22721             }
22722
22723           output_addr_const (stream, x);
22724           break;
22725         }
22726     }
22727 }
22728 \f
22729 /* Target hook for printing a memory address.  */
22730 static void
22731 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22732 {
22733   if (TARGET_32BIT)
22734     {
22735       int is_minus = GET_CODE (x) == MINUS;
22736
22737       if (REG_P (x))
22738         asm_fprintf (stream, "[%r]", REGNO (x));
22739       else if (GET_CODE (x) == PLUS || is_minus)
22740         {
22741           rtx base = XEXP (x, 0);
22742           rtx index = XEXP (x, 1);
22743           HOST_WIDE_INT offset = 0;
22744           if (!REG_P (base)
22745               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22746             {
22747               /* Ensure that BASE is a register.  */
22748               /* (one of them must be).  */
22749               /* Also ensure the SP is not used as in index register.  */
22750               std::swap (base, index);
22751             }
22752           switch (GET_CODE (index))
22753             {
22754             case CONST_INT:
22755               offset = INTVAL (index);
22756               if (is_minus)
22757                 offset = -offset;
22758               asm_fprintf (stream, "[%r, #%wd]",
22759                            REGNO (base), offset);
22760               break;
22761
22762             case REG:
22763               asm_fprintf (stream, "[%r, %s%r]",
22764                            REGNO (base), is_minus ? "-" : "",
22765                            REGNO (index));
22766               break;
22767
22768             case MULT:
22769             case ASHIFTRT:
22770             case LSHIFTRT:
22771             case ASHIFT:
22772             case ROTATERT:
22773               {
22774                 asm_fprintf (stream, "[%r, %s%r",
22775                              REGNO (base), is_minus ? "-" : "",
22776                              REGNO (XEXP (index, 0)));
22777                 arm_print_operand (stream, index, 'S');
22778                 fputs ("]", stream);
22779                 break;
22780               }
22781
22782             default:
22783               gcc_unreachable ();
22784             }
22785         }
22786       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22787                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22788         {
22789           gcc_assert (REG_P (XEXP (x, 0)));
22790
22791           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22792             asm_fprintf (stream, "[%r, #%s%d]!",
22793                          REGNO (XEXP (x, 0)),
22794                          GET_CODE (x) == PRE_DEC ? "-" : "",
22795                          GET_MODE_SIZE (mode));
22796           else
22797             asm_fprintf (stream, "[%r], #%s%d",
22798                          REGNO (XEXP (x, 0)),
22799                          GET_CODE (x) == POST_DEC ? "-" : "",
22800                          GET_MODE_SIZE (mode));
22801         }
22802       else if (GET_CODE (x) == PRE_MODIFY)
22803         {
22804           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22805           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22806             asm_fprintf (stream, "#%wd]!",
22807                          INTVAL (XEXP (XEXP (x, 1), 1)));
22808           else
22809             asm_fprintf (stream, "%r]!",
22810                          REGNO (XEXP (XEXP (x, 1), 1)));
22811         }
22812       else if (GET_CODE (x) == POST_MODIFY)
22813         {
22814           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22815           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22816             asm_fprintf (stream, "#%wd",
22817                          INTVAL (XEXP (XEXP (x, 1), 1)));
22818           else
22819             asm_fprintf (stream, "%r",
22820                          REGNO (XEXP (XEXP (x, 1), 1)));
22821         }
22822       else output_addr_const (stream, x);
22823     }
22824   else
22825     {
22826       if (REG_P (x))
22827         asm_fprintf (stream, "[%r]", REGNO (x));
22828       else if (GET_CODE (x) == POST_INC)
22829         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22830       else if (GET_CODE (x) == PLUS)
22831         {
22832           gcc_assert (REG_P (XEXP (x, 0)));
22833           if (CONST_INT_P (XEXP (x, 1)))
22834             asm_fprintf (stream, "[%r, #%wd]",
22835                          REGNO (XEXP (x, 0)),
22836                          INTVAL (XEXP (x, 1)));
22837           else
22838             asm_fprintf (stream, "[%r, %r]",
22839                          REGNO (XEXP (x, 0)),
22840                          REGNO (XEXP (x, 1)));
22841         }
22842       else
22843         output_addr_const (stream, x);
22844     }
22845 }
22846 \f
22847 /* Target hook for indicating whether a punctuation character for
22848    TARGET_PRINT_OPERAND is valid.  */
22849 static bool
22850 arm_print_operand_punct_valid_p (unsigned char code)
22851 {
22852   return (code == '@' || code == '|' || code == '.'
22853           || code == '(' || code == ')' || code == '#'
22854           || (TARGET_32BIT && (code == '?'))
22855           || (TARGET_THUMB2 && (code == '!'))
22856           || (TARGET_THUMB && (code == '_')));
22857 }
22858 \f
22859 /* Target hook for assembling integer objects.  The ARM version needs to
22860    handle word-sized values specially.  */
22861 static bool
22862 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22863 {
22864   machine_mode mode;
22865
22866   if (size == UNITS_PER_WORD && aligned_p)
22867     {
22868       fputs ("\t.word\t", asm_out_file);
22869       output_addr_const (asm_out_file, x);
22870
22871       /* Mark symbols as position independent.  We only do this in the
22872          .text segment, not in the .data segment.  */
22873       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22874           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22875         {
22876           /* See legitimize_pic_address for an explanation of the
22877              TARGET_VXWORKS_RTP check.  */
22878           /* References to weak symbols cannot be resolved locally:
22879              they may be overridden by a non-weak definition at link
22880              time.  */
22881           if (!arm_pic_data_is_text_relative
22882               || (GET_CODE (x) == SYMBOL_REF
22883                   && (!SYMBOL_REF_LOCAL_P (x)
22884                       || (SYMBOL_REF_DECL (x)
22885                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22886             fputs ("(GOT)", asm_out_file);
22887           else
22888             fputs ("(GOTOFF)", asm_out_file);
22889         }
22890       fputc ('\n', asm_out_file);
22891       return true;
22892     }
22893
22894   mode = GET_MODE (x);
22895
22896   if (arm_vector_mode_supported_p (mode))
22897     {
22898       int i, units;
22899
22900       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22901
22902       units = CONST_VECTOR_NUNITS (x);
22903       size = GET_MODE_UNIT_SIZE (mode);
22904
22905       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22906         for (i = 0; i < units; i++)
22907           {
22908             rtx elt = CONST_VECTOR_ELT (x, i);
22909             assemble_integer
22910               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22911           }
22912       else
22913         for (i = 0; i < units; i++)
22914           {
22915             rtx elt = CONST_VECTOR_ELT (x, i);
22916             assemble_real
22917               (*CONST_DOUBLE_REAL_VALUE (elt),
22918                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22919                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22920           }
22921
22922       return true;
22923     }
22924
22925   return default_assemble_integer (x, size, aligned_p);
22926 }
22927
22928 static void
22929 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22930 {
22931   section *s;
22932
22933   if (!TARGET_AAPCS_BASED)
22934     {
22935       (is_ctor ?
22936        default_named_section_asm_out_constructor
22937        : default_named_section_asm_out_destructor) (symbol, priority);
22938       return;
22939     }
22940
22941   /* Put these in the .init_array section, using a special relocation.  */
22942   if (priority != DEFAULT_INIT_PRIORITY)
22943     {
22944       char buf[18];
22945       sprintf (buf, "%s.%.5u",
22946                is_ctor ? ".init_array" : ".fini_array",
22947                priority);
22948       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22949     }
22950   else if (is_ctor)
22951     s = ctors_section;
22952   else
22953     s = dtors_section;
22954
22955   switch_to_section (s);
22956   assemble_align (POINTER_SIZE);
22957   fputs ("\t.word\t", asm_out_file);
22958   output_addr_const (asm_out_file, symbol);
22959   fputs ("(target1)\n", asm_out_file);
22960 }
22961
22962 /* Add a function to the list of static constructors.  */
22963
22964 static void
22965 arm_elf_asm_constructor (rtx symbol, int priority)
22966 {
22967   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22968 }
22969
22970 /* Add a function to the list of static destructors.  */
22971
22972 static void
22973 arm_elf_asm_destructor (rtx symbol, int priority)
22974 {
22975   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22976 }
22977 \f
22978 /* A finite state machine takes care of noticing whether or not instructions
22979    can be conditionally executed, and thus decrease execution time and code
22980    size by deleting branch instructions.  The fsm is controlled by
22981    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22982
22983 /* The state of the fsm controlling condition codes are:
22984    0: normal, do nothing special
22985    1: make ASM_OUTPUT_OPCODE not output this instruction
22986    2: make ASM_OUTPUT_OPCODE not output this instruction
22987    3: make instructions conditional
22988    4: make instructions conditional
22989
22990    State transitions (state->state by whom under condition):
22991    0 -> 1 final_prescan_insn if the `target' is a label
22992    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22993    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22994    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22995    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22996           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22997    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22998           (the target insn is arm_target_insn).
22999
23000    If the jump clobbers the conditions then we use states 2 and 4.
23001
23002    A similar thing can be done with conditional return insns.
23003
23004    XXX In case the `target' is an unconditional branch, this conditionalising
23005    of the instructions always reduces code size, but not always execution
23006    time.  But then, I want to reduce the code size to somewhere near what
23007    /bin/cc produces.  */
23008
23009 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23010    instructions.  When a COND_EXEC instruction is seen the subsequent
23011    instructions are scanned so that multiple conditional instructions can be
23012    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
23013    specify the length and true/false mask for the IT block.  These will be
23014    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
23015
23016 /* Returns the index of the ARM condition code string in
23017    `arm_condition_codes', or ARM_NV if the comparison is invalid.
23018    COMPARISON should be an rtx like `(eq (...) (...))'.  */
23019
23020 enum arm_cond_code
23021 maybe_get_arm_condition_code (rtx comparison)
23022 {
23023   machine_mode mode = GET_MODE (XEXP (comparison, 0));
23024   enum arm_cond_code code;
23025   enum rtx_code comp_code = GET_CODE (comparison);
23026
23027   if (GET_MODE_CLASS (mode) != MODE_CC)
23028     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23029                            XEXP (comparison, 1));
23030
23031   switch (mode)
23032     {
23033     case E_CC_DNEmode: code = ARM_NE; goto dominance;
23034     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23035     case E_CC_DGEmode: code = ARM_GE; goto dominance;
23036     case E_CC_DGTmode: code = ARM_GT; goto dominance;
23037     case E_CC_DLEmode: code = ARM_LE; goto dominance;
23038     case E_CC_DLTmode: code = ARM_LT; goto dominance;
23039     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23040     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23041     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23042     case E_CC_DLTUmode: code = ARM_CC;
23043
23044     dominance:
23045       if (comp_code == EQ)
23046         return ARM_INVERSE_CONDITION_CODE (code);
23047       if (comp_code == NE)
23048         return code;
23049       return ARM_NV;
23050
23051     case E_CC_NOOVmode:
23052       switch (comp_code)
23053         {
23054         case NE: return ARM_NE;
23055         case EQ: return ARM_EQ;
23056         case GE: return ARM_PL;
23057         case LT: return ARM_MI;
23058         default: return ARM_NV;
23059         }
23060
23061     case E_CC_Zmode:
23062       switch (comp_code)
23063         {
23064         case NE: return ARM_NE;
23065         case EQ: return ARM_EQ;
23066         default: return ARM_NV;
23067         }
23068
23069     case E_CC_Nmode:
23070       switch (comp_code)
23071         {
23072         case NE: return ARM_MI;
23073         case EQ: return ARM_PL;
23074         default: return ARM_NV;
23075         }
23076
23077     case E_CCFPEmode:
23078     case E_CCFPmode:
23079       /* We can handle all cases except UNEQ and LTGT.  */
23080       switch (comp_code)
23081         {
23082         case GE: return ARM_GE;
23083         case GT: return ARM_GT;
23084         case LE: return ARM_LS;
23085         case LT: return ARM_MI;
23086         case NE: return ARM_NE;
23087         case EQ: return ARM_EQ;
23088         case ORDERED: return ARM_VC;
23089         case UNORDERED: return ARM_VS;
23090         case UNLT: return ARM_LT;
23091         case UNLE: return ARM_LE;
23092         case UNGT: return ARM_HI;
23093         case UNGE: return ARM_PL;
23094           /* UNEQ and LTGT do not have a representation.  */
23095         case UNEQ: /* Fall through.  */
23096         case LTGT: /* Fall through.  */
23097         default: return ARM_NV;
23098         }
23099
23100     case E_CC_SWPmode:
23101       switch (comp_code)
23102         {
23103         case NE: return ARM_NE;
23104         case EQ: return ARM_EQ;
23105         case GE: return ARM_LE;
23106         case GT: return ARM_LT;
23107         case LE: return ARM_GE;
23108         case LT: return ARM_GT;
23109         case GEU: return ARM_LS;
23110         case GTU: return ARM_CC;
23111         case LEU: return ARM_CS;
23112         case LTU: return ARM_HI;
23113         default: return ARM_NV;
23114         }
23115
23116     case E_CC_Cmode:
23117       switch (comp_code)
23118         {
23119         case LTU: return ARM_CS;
23120         case GEU: return ARM_CC;
23121         case NE: return ARM_CS;
23122         case EQ: return ARM_CC;
23123         default: return ARM_NV;
23124         }
23125
23126     case E_CC_CZmode:
23127       switch (comp_code)
23128         {
23129         case NE: return ARM_NE;
23130         case EQ: return ARM_EQ;
23131         case GEU: return ARM_CS;
23132         case GTU: return ARM_HI;
23133         case LEU: return ARM_LS;
23134         case LTU: return ARM_CC;
23135         default: return ARM_NV;
23136         }
23137
23138     case E_CC_NCVmode:
23139       switch (comp_code)
23140         {
23141         case GE: return ARM_GE;
23142         case LT: return ARM_LT;
23143         case GEU: return ARM_CS;
23144         case LTU: return ARM_CC;
23145         default: return ARM_NV;
23146         }
23147
23148     case E_CC_Vmode:
23149       switch (comp_code)
23150         {
23151         case NE: return ARM_VS;
23152         case EQ: return ARM_VC;
23153         default: return ARM_NV;
23154         }
23155
23156     case E_CCmode:
23157       switch (comp_code)
23158         {
23159         case NE: return ARM_NE;
23160         case EQ: return ARM_EQ;
23161         case GE: return ARM_GE;
23162         case GT: return ARM_GT;
23163         case LE: return ARM_LE;
23164         case LT: return ARM_LT;
23165         case GEU: return ARM_CS;
23166         case GTU: return ARM_HI;
23167         case LEU: return ARM_LS;
23168         case LTU: return ARM_CC;
23169         default: return ARM_NV;
23170         }
23171
23172     default: gcc_unreachable ();
23173     }
23174 }
23175
23176 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23177 static enum arm_cond_code
23178 get_arm_condition_code (rtx comparison)
23179 {
23180   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23181   gcc_assert (code != ARM_NV);
23182   return code;
23183 }
23184
23185 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23186    code registers when not targetting Thumb1.  The VFP condition register
23187    only exists when generating hard-float code.  */
23188 static bool
23189 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23190 {
23191   if (!TARGET_32BIT)
23192     return false;
23193
23194   *p1 = CC_REGNUM;
23195   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23196   return true;
23197 }
23198
23199 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23200    instructions.  */
23201 void
23202 thumb2_final_prescan_insn (rtx_insn *insn)
23203 {
23204   rtx_insn *first_insn = insn;
23205   rtx body = PATTERN (insn);
23206   rtx predicate;
23207   enum arm_cond_code code;
23208   int n;
23209   int mask;
23210   int max;
23211
23212   /* max_insns_skipped in the tune was already taken into account in the
23213      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23214      just emit the IT blocks as we can.  It does not make sense to split
23215      the IT blocks.  */
23216   max = MAX_INSN_PER_IT_BLOCK;
23217
23218   /* Remove the previous insn from the count of insns to be output.  */
23219   if (arm_condexec_count)
23220       arm_condexec_count--;
23221
23222   /* Nothing to do if we are already inside a conditional block.  */
23223   if (arm_condexec_count)
23224     return;
23225
23226   if (GET_CODE (body) != COND_EXEC)
23227     return;
23228
23229   /* Conditional jumps are implemented directly.  */
23230   if (JUMP_P (insn))
23231     return;
23232
23233   predicate = COND_EXEC_TEST (body);
23234   arm_current_cc = get_arm_condition_code (predicate);
23235
23236   n = get_attr_ce_count (insn);
23237   arm_condexec_count = 1;
23238   arm_condexec_mask = (1 << n) - 1;
23239   arm_condexec_masklen = n;
23240   /* See if subsequent instructions can be combined into the same block.  */
23241   for (;;)
23242     {
23243       insn = next_nonnote_insn (insn);
23244
23245       /* Jumping into the middle of an IT block is illegal, so a label or
23246          barrier terminates the block.  */
23247       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23248         break;
23249
23250       body = PATTERN (insn);
23251       /* USE and CLOBBER aren't really insns, so just skip them.  */
23252       if (GET_CODE (body) == USE
23253           || GET_CODE (body) == CLOBBER)
23254         continue;
23255
23256       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23257       if (GET_CODE (body) != COND_EXEC)
23258         break;
23259       /* Maximum number of conditionally executed instructions in a block.  */
23260       n = get_attr_ce_count (insn);
23261       if (arm_condexec_masklen + n > max)
23262         break;
23263
23264       predicate = COND_EXEC_TEST (body);
23265       code = get_arm_condition_code (predicate);
23266       mask = (1 << n) - 1;
23267       if (arm_current_cc == code)
23268         arm_condexec_mask |= (mask << arm_condexec_masklen);
23269       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23270         break;
23271
23272       arm_condexec_count++;
23273       arm_condexec_masklen += n;
23274
23275       /* A jump must be the last instruction in a conditional block.  */
23276       if (JUMP_P (insn))
23277         break;
23278     }
23279   /* Restore recog_data (getting the attributes of other insns can
23280      destroy this array, but final.c assumes that it remains intact
23281      across this call).  */
23282   extract_constrain_insn_cached (first_insn);
23283 }
23284
23285 void
23286 arm_final_prescan_insn (rtx_insn *insn)
23287 {
23288   /* BODY will hold the body of INSN.  */
23289   rtx body = PATTERN (insn);
23290
23291   /* This will be 1 if trying to repeat the trick, and things need to be
23292      reversed if it appears to fail.  */
23293   int reverse = 0;
23294
23295   /* If we start with a return insn, we only succeed if we find another one.  */
23296   int seeking_return = 0;
23297   enum rtx_code return_code = UNKNOWN;
23298
23299   /* START_INSN will hold the insn from where we start looking.  This is the
23300      first insn after the following code_label if REVERSE is true.  */
23301   rtx_insn *start_insn = insn;
23302
23303   /* If in state 4, check if the target branch is reached, in order to
23304      change back to state 0.  */
23305   if (arm_ccfsm_state == 4)
23306     {
23307       if (insn == arm_target_insn)
23308         {
23309           arm_target_insn = NULL;
23310           arm_ccfsm_state = 0;
23311         }
23312       return;
23313     }
23314
23315   /* If in state 3, it is possible to repeat the trick, if this insn is an
23316      unconditional branch to a label, and immediately following this branch
23317      is the previous target label which is only used once, and the label this
23318      branch jumps to is not too far off.  */
23319   if (arm_ccfsm_state == 3)
23320     {
23321       if (simplejump_p (insn))
23322         {
23323           start_insn = next_nonnote_insn (start_insn);
23324           if (BARRIER_P (start_insn))
23325             {
23326               /* XXX Isn't this always a barrier?  */
23327               start_insn = next_nonnote_insn (start_insn);
23328             }
23329           if (LABEL_P (start_insn)
23330               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23331               && LABEL_NUSES (start_insn) == 1)
23332             reverse = TRUE;
23333           else
23334             return;
23335         }
23336       else if (ANY_RETURN_P (body))
23337         {
23338           start_insn = next_nonnote_insn (start_insn);
23339           if (BARRIER_P (start_insn))
23340             start_insn = next_nonnote_insn (start_insn);
23341           if (LABEL_P (start_insn)
23342               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23343               && LABEL_NUSES (start_insn) == 1)
23344             {
23345               reverse = TRUE;
23346               seeking_return = 1;
23347               return_code = GET_CODE (body);
23348             }
23349           else
23350             return;
23351         }
23352       else
23353         return;
23354     }
23355
23356   gcc_assert (!arm_ccfsm_state || reverse);
23357   if (!JUMP_P (insn))
23358     return;
23359
23360   /* This jump might be paralleled with a clobber of the condition codes
23361      the jump should always come first */
23362   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23363     body = XVECEXP (body, 0, 0);
23364
23365   if (reverse
23366       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23367           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23368     {
23369       int insns_skipped;
23370       int fail = FALSE, succeed = FALSE;
23371       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23372       int then_not_else = TRUE;
23373       rtx_insn *this_insn = start_insn;
23374       rtx label = 0;
23375
23376       /* Register the insn jumped to.  */
23377       if (reverse)
23378         {
23379           if (!seeking_return)
23380             label = XEXP (SET_SRC (body), 0);
23381         }
23382       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23383         label = XEXP (XEXP (SET_SRC (body), 1), 0);
23384       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23385         {
23386           label = XEXP (XEXP (SET_SRC (body), 2), 0);
23387           then_not_else = FALSE;
23388         }
23389       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23390         {
23391           seeking_return = 1;
23392           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23393         }
23394       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23395         {
23396           seeking_return = 1;
23397           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23398           then_not_else = FALSE;
23399         }
23400       else
23401         gcc_unreachable ();
23402
23403       /* See how many insns this branch skips, and what kind of insns.  If all
23404          insns are okay, and the label or unconditional branch to the same
23405          label is not too far away, succeed.  */
23406       for (insns_skipped = 0;
23407            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23408         {
23409           rtx scanbody;
23410
23411           this_insn = next_nonnote_insn (this_insn);
23412           if (!this_insn)
23413             break;
23414
23415           switch (GET_CODE (this_insn))
23416             {
23417             case CODE_LABEL:
23418               /* Succeed if it is the target label, otherwise fail since
23419                  control falls in from somewhere else.  */
23420               if (this_insn == label)
23421                 {
23422                   arm_ccfsm_state = 1;
23423                   succeed = TRUE;
23424                 }
23425               else
23426                 fail = TRUE;
23427               break;
23428
23429             case BARRIER:
23430               /* Succeed if the following insn is the target label.
23431                  Otherwise fail.
23432                  If return insns are used then the last insn in a function
23433                  will be a barrier.  */
23434               this_insn = next_nonnote_insn (this_insn);
23435               if (this_insn && this_insn == label)
23436                 {
23437                   arm_ccfsm_state = 1;
23438                   succeed = TRUE;
23439                 }
23440               else
23441                 fail = TRUE;
23442               break;
23443
23444             case CALL_INSN:
23445               /* The AAPCS says that conditional calls should not be
23446                  used since they make interworking inefficient (the
23447                  linker can't transform BL<cond> into BLX).  That's
23448                  only a problem if the machine has BLX.  */
23449               if (arm_arch5t)
23450                 {
23451                   fail = TRUE;
23452                   break;
23453                 }
23454
23455               /* Succeed if the following insn is the target label, or
23456                  if the following two insns are a barrier and the
23457                  target label.  */
23458               this_insn = next_nonnote_insn (this_insn);
23459               if (this_insn && BARRIER_P (this_insn))
23460                 this_insn = next_nonnote_insn (this_insn);
23461
23462               if (this_insn && this_insn == label
23463                   && insns_skipped < max_insns_skipped)
23464                 {
23465                   arm_ccfsm_state = 1;
23466                   succeed = TRUE;
23467                 }
23468               else
23469                 fail = TRUE;
23470               break;
23471
23472             case JUMP_INSN:
23473               /* If this is an unconditional branch to the same label, succeed.
23474                  If it is to another label, do nothing.  If it is conditional,
23475                  fail.  */
23476               /* XXX Probably, the tests for SET and the PC are
23477                  unnecessary.  */
23478
23479               scanbody = PATTERN (this_insn);
23480               if (GET_CODE (scanbody) == SET
23481                   && GET_CODE (SET_DEST (scanbody)) == PC)
23482                 {
23483                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23484                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23485                     {
23486                       arm_ccfsm_state = 2;
23487                       succeed = TRUE;
23488                     }
23489                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23490                     fail = TRUE;
23491                 }
23492               /* Fail if a conditional return is undesirable (e.g. on a
23493                  StrongARM), but still allow this if optimizing for size.  */
23494               else if (GET_CODE (scanbody) == return_code
23495                        && !use_return_insn (TRUE, NULL)
23496                        && !optimize_size)
23497                 fail = TRUE;
23498               else if (GET_CODE (scanbody) == return_code)
23499                 {
23500                   arm_ccfsm_state = 2;
23501                   succeed = TRUE;
23502                 }
23503               else if (GET_CODE (scanbody) == PARALLEL)
23504                 {
23505                   switch (get_attr_conds (this_insn))
23506                     {
23507                     case CONDS_NOCOND:
23508                       break;
23509                     default:
23510                       fail = TRUE;
23511                       break;
23512                     }
23513                 }
23514               else
23515                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
23516
23517               break;
23518
23519             case INSN:
23520               /* Instructions using or affecting the condition codes make it
23521                  fail.  */
23522               scanbody = PATTERN (this_insn);
23523               if (!(GET_CODE (scanbody) == SET
23524                     || GET_CODE (scanbody) == PARALLEL)
23525                   || get_attr_conds (this_insn) != CONDS_NOCOND)
23526                 fail = TRUE;
23527               break;
23528
23529             default:
23530               break;
23531             }
23532         }
23533       if (succeed)
23534         {
23535           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23536             arm_target_label = CODE_LABEL_NUMBER (label);
23537           else
23538             {
23539               gcc_assert (seeking_return || arm_ccfsm_state == 2);
23540
23541               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23542                 {
23543                   this_insn = next_nonnote_insn (this_insn);
23544                   gcc_assert (!this_insn
23545                               || (!BARRIER_P (this_insn)
23546                                   && !LABEL_P (this_insn)));
23547                 }
23548               if (!this_insn)
23549                 {
23550                   /* Oh, dear! we ran off the end.. give up.  */
23551                   extract_constrain_insn_cached (insn);
23552                   arm_ccfsm_state = 0;
23553                   arm_target_insn = NULL;
23554                   return;
23555                 }
23556               arm_target_insn = this_insn;
23557             }
23558
23559           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23560              what it was.  */
23561           if (!reverse)
23562             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23563
23564           if (reverse || then_not_else)
23565             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23566         }
23567
23568       /* Restore recog_data (getting the attributes of other insns can
23569          destroy this array, but final.c assumes that it remains intact
23570          across this call.  */
23571       extract_constrain_insn_cached (insn);
23572     }
23573 }
23574
23575 /* Output IT instructions.  */
23576 void
23577 thumb2_asm_output_opcode (FILE * stream)
23578 {
23579   char buff[5];
23580   int n;
23581
23582   if (arm_condexec_mask)
23583     {
23584       for (n = 0; n < arm_condexec_masklen; n++)
23585         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23586       buff[n] = 0;
23587       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23588                   arm_condition_codes[arm_current_cc]);
23589       arm_condexec_mask = 0;
23590     }
23591 }
23592
23593 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23594    UNITS_PER_WORD bytes wide.  */
23595 static unsigned int
23596 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23597 {
23598   if (TARGET_32BIT
23599       && regno > PC_REGNUM
23600       && regno != FRAME_POINTER_REGNUM
23601       && regno != ARG_POINTER_REGNUM
23602       && !IS_VFP_REGNUM (regno))
23603     return 1;
23604
23605   return ARM_NUM_REGS (mode);
23606 }
23607
23608 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23609 static bool
23610 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23611 {
23612   if (GET_MODE_CLASS (mode) == MODE_CC)
23613     return (regno == CC_REGNUM
23614             || (TARGET_HARD_FLOAT
23615                 && regno == VFPCC_REGNUM));
23616
23617   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23618     return false;
23619
23620   if (TARGET_THUMB1)
23621     /* For the Thumb we only allow values bigger than SImode in
23622        registers 0 - 6, so that there is always a second low
23623        register available to hold the upper part of the value.
23624        We probably we ought to ensure that the register is the
23625        start of an even numbered register pair.  */
23626     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23627
23628   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23629     {
23630       if (mode == SFmode || mode == SImode)
23631         return VFP_REGNO_OK_FOR_SINGLE (regno);
23632
23633       if (mode == DFmode)
23634         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23635
23636       if (mode == HFmode)
23637         return VFP_REGNO_OK_FOR_SINGLE (regno);
23638
23639       /* VFP registers can hold HImode values.  */
23640       if (mode == HImode)
23641         return VFP_REGNO_OK_FOR_SINGLE (regno);
23642
23643       if (TARGET_NEON)
23644         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23645                || (VALID_NEON_QREG_MODE (mode)
23646                    && NEON_REGNO_OK_FOR_QUAD (regno))
23647                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23648                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23649                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23650                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23651                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23652
23653       return false;
23654     }
23655
23656   if (TARGET_REALLY_IWMMXT)
23657     {
23658       if (IS_IWMMXT_GR_REGNUM (regno))
23659         return mode == SImode;
23660
23661       if (IS_IWMMXT_REGNUM (regno))
23662         return VALID_IWMMXT_REG_MODE (mode);
23663     }
23664
23665   /* We allow almost any value to be stored in the general registers.
23666      Restrict doubleword quantities to even register pairs in ARM state
23667      so that we can use ldrd.  Do not allow very large Neon structure
23668      opaque modes in general registers; they would use too many.  */
23669   if (regno <= LAST_ARM_REGNUM)
23670     {
23671       if (ARM_NUM_REGS (mode) > 4)
23672         return false;
23673
23674       if (TARGET_THUMB2)
23675         return true;
23676
23677       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23678     }
23679
23680   if (regno == FRAME_POINTER_REGNUM
23681       || regno == ARG_POINTER_REGNUM)
23682     /* We only allow integers in the fake hard registers.  */
23683     return GET_MODE_CLASS (mode) == MODE_INT;
23684
23685   return false;
23686 }
23687
23688 /* Implement TARGET_MODES_TIEABLE_P.  */
23689
23690 static bool
23691 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23692 {
23693   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23694     return true;
23695
23696   /* We specifically want to allow elements of "structure" modes to
23697      be tieable to the structure.  This more general condition allows
23698      other rarer situations too.  */
23699   if (TARGET_NEON
23700       && (VALID_NEON_DREG_MODE (mode1)
23701           || VALID_NEON_QREG_MODE (mode1)
23702           || VALID_NEON_STRUCT_MODE (mode1))
23703       && (VALID_NEON_DREG_MODE (mode2)
23704           || VALID_NEON_QREG_MODE (mode2)
23705           || VALID_NEON_STRUCT_MODE (mode2)))
23706     return true;
23707
23708   return false;
23709 }
23710
23711 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23712    not used in arm mode.  */
23713
23714 enum reg_class
23715 arm_regno_class (int regno)
23716 {
23717   if (regno == PC_REGNUM)
23718     return NO_REGS;
23719
23720   if (TARGET_THUMB1)
23721     {
23722       if (regno == STACK_POINTER_REGNUM)
23723         return STACK_REG;
23724       if (regno == CC_REGNUM)
23725         return CC_REG;
23726       if (regno < 8)
23727         return LO_REGS;
23728       return HI_REGS;
23729     }
23730
23731   if (TARGET_THUMB2 && regno < 8)
23732     return LO_REGS;
23733
23734   if (   regno <= LAST_ARM_REGNUM
23735       || regno == FRAME_POINTER_REGNUM
23736       || regno == ARG_POINTER_REGNUM)
23737     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23738
23739   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23740     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23741
23742   if (IS_VFP_REGNUM (regno))
23743     {
23744       if (regno <= D7_VFP_REGNUM)
23745         return VFP_D0_D7_REGS;
23746       else if (regno <= LAST_LO_VFP_REGNUM)
23747         return VFP_LO_REGS;
23748       else
23749         return VFP_HI_REGS;
23750     }
23751
23752   if (IS_IWMMXT_REGNUM (regno))
23753     return IWMMXT_REGS;
23754
23755   if (IS_IWMMXT_GR_REGNUM (regno))
23756     return IWMMXT_GR_REGS;
23757
23758   return NO_REGS;
23759 }
23760
23761 /* Handle a special case when computing the offset
23762    of an argument from the frame pointer.  */
23763 int
23764 arm_debugger_arg_offset (int value, rtx addr)
23765 {
23766   rtx_insn *insn;
23767
23768   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23769   if (value != 0)
23770     return 0;
23771
23772   /* We can only cope with the case where the address is held in a register.  */
23773   if (!REG_P (addr))
23774     return 0;
23775
23776   /* If we are using the frame pointer to point at the argument, then
23777      an offset of 0 is correct.  */
23778   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23779     return 0;
23780
23781   /* If we are using the stack pointer to point at the
23782      argument, then an offset of 0 is correct.  */
23783   /* ??? Check this is consistent with thumb2 frame layout.  */
23784   if ((TARGET_THUMB || !frame_pointer_needed)
23785       && REGNO (addr) == SP_REGNUM)
23786     return 0;
23787
23788   /* Oh dear.  The argument is pointed to by a register rather
23789      than being held in a register, or being stored at a known
23790      offset from the frame pointer.  Since GDB only understands
23791      those two kinds of argument we must translate the address
23792      held in the register into an offset from the frame pointer.
23793      We do this by searching through the insns for the function
23794      looking to see where this register gets its value.  If the
23795      register is initialized from the frame pointer plus an offset
23796      then we are in luck and we can continue, otherwise we give up.
23797
23798      This code is exercised by producing debugging information
23799      for a function with arguments like this:
23800
23801            double func (double a, double b, int c, double d) {return d;}
23802
23803      Without this code the stab for parameter 'd' will be set to
23804      an offset of 0 from the frame pointer, rather than 8.  */
23805
23806   /* The if() statement says:
23807
23808      If the insn is a normal instruction
23809      and if the insn is setting the value in a register
23810      and if the register being set is the register holding the address of the argument
23811      and if the address is computing by an addition
23812      that involves adding to a register
23813      which is the frame pointer
23814      a constant integer
23815
23816      then...  */
23817
23818   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23819     {
23820       if (   NONJUMP_INSN_P (insn)
23821           && GET_CODE (PATTERN (insn)) == SET
23822           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23823           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23824           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23825           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23826           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23827              )
23828         {
23829           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23830
23831           break;
23832         }
23833     }
23834
23835   if (value == 0)
23836     {
23837       debug_rtx (addr);
23838       warning (0, "unable to compute real location of stacked parameter");
23839       value = 8; /* XXX magic hack */
23840     }
23841
23842   return value;
23843 }
23844 \f
23845 /* Implement TARGET_PROMOTED_TYPE.  */
23846
23847 static tree
23848 arm_promoted_type (const_tree t)
23849 {
23850   if (SCALAR_FLOAT_TYPE_P (t)
23851       && TYPE_PRECISION (t) == 16
23852       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23853     return float_type_node;
23854   return NULL_TREE;
23855 }
23856
23857 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23858    This simply adds HFmode as a supported mode; even though we don't
23859    implement arithmetic on this type directly, it's supported by
23860    optabs conversions, much the way the double-word arithmetic is
23861    special-cased in the default hook.  */
23862
23863 static bool
23864 arm_scalar_mode_supported_p (scalar_mode mode)
23865 {
23866   if (mode == HFmode)
23867     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23868   else if (ALL_FIXED_POINT_MODE_P (mode))
23869     return true;
23870   else
23871     return default_scalar_mode_supported_p (mode);
23872 }
23873
23874 /* Set the value of FLT_EVAL_METHOD.
23875    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23876
23877     0: evaluate all operations and constants, whose semantic type has at
23878        most the range and precision of type float, to the range and
23879        precision of float; evaluate all other operations and constants to
23880        the range and precision of the semantic type;
23881
23882     N, where _FloatN is a supported interchange floating type
23883        evaluate all operations and constants, whose semantic type has at
23884        most the range and precision of _FloatN type, to the range and
23885        precision of the _FloatN type; evaluate all other operations and
23886        constants to the range and precision of the semantic type;
23887
23888    If we have the ARMv8.2-A extensions then we support _Float16 in native
23889    precision, so we should set this to 16.  Otherwise, we support the type,
23890    but want to evaluate expressions in float precision, so set this to
23891    0.  */
23892
23893 static enum flt_eval_method
23894 arm_excess_precision (enum excess_precision_type type)
23895 {
23896   switch (type)
23897     {
23898       case EXCESS_PRECISION_TYPE_FAST:
23899       case EXCESS_PRECISION_TYPE_STANDARD:
23900         /* We can calculate either in 16-bit range and precision or
23901            32-bit range and precision.  Make that decision based on whether
23902            we have native support for the ARMv8.2-A 16-bit floating-point
23903            instructions or not.  */
23904         return (TARGET_VFP_FP16INST
23905                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23906                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23907       case EXCESS_PRECISION_TYPE_IMPLICIT:
23908         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23909       default:
23910         gcc_unreachable ();
23911     }
23912   return FLT_EVAL_METHOD_UNPREDICTABLE;
23913 }
23914
23915
23916 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23917    _Float16 if we are using anything other than ieee format for 16-bit
23918    floating point.  Otherwise, punt to the default implementation.  */
23919 static opt_scalar_float_mode
23920 arm_floatn_mode (int n, bool extended)
23921 {
23922   if (!extended && n == 16)
23923     {
23924       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23925         return HFmode;
23926       return opt_scalar_float_mode ();
23927     }
23928
23929   return default_floatn_mode (n, extended);
23930 }
23931
23932
23933 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23934    not to early-clobber SRC registers in the process.
23935
23936    We assume that the operands described by SRC and DEST represent a
23937    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23938    number of components into which the copy has been decomposed.  */
23939 void
23940 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23941 {
23942   unsigned int i;
23943
23944   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23945       || REGNO (operands[0]) < REGNO (operands[1]))
23946     {
23947       for (i = 0; i < count; i++)
23948         {
23949           operands[2 * i] = dest[i];
23950           operands[2 * i + 1] = src[i];
23951         }
23952     }
23953   else
23954     {
23955       for (i = 0; i < count; i++)
23956         {
23957           operands[2 * i] = dest[count - i - 1];
23958           operands[2 * i + 1] = src[count - i - 1];
23959         }
23960     }
23961 }
23962
23963 /* Split operands into moves from op[1] + op[2] into op[0].  */
23964
23965 void
23966 neon_split_vcombine (rtx operands[3])
23967 {
23968   unsigned int dest = REGNO (operands[0]);
23969   unsigned int src1 = REGNO (operands[1]);
23970   unsigned int src2 = REGNO (operands[2]);
23971   machine_mode halfmode = GET_MODE (operands[1]);
23972   unsigned int halfregs = REG_NREGS (operands[1]);
23973   rtx destlo, desthi;
23974
23975   if (src1 == dest && src2 == dest + halfregs)
23976     {
23977       /* No-op move.  Can't split to nothing; emit something.  */
23978       emit_note (NOTE_INSN_DELETED);
23979       return;
23980     }
23981
23982   /* Preserve register attributes for variable tracking.  */
23983   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23984   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23985                                GET_MODE_SIZE (halfmode));
23986
23987   /* Special case of reversed high/low parts.  Use VSWP.  */
23988   if (src2 == dest && src1 == dest + halfregs)
23989     {
23990       rtx x = gen_rtx_SET (destlo, operands[1]);
23991       rtx y = gen_rtx_SET (desthi, operands[2]);
23992       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23993       return;
23994     }
23995
23996   if (!reg_overlap_mentioned_p (operands[2], destlo))
23997     {
23998       /* Try to avoid unnecessary moves if part of the result
23999          is in the right place already.  */
24000       if (src1 != dest)
24001         emit_move_insn (destlo, operands[1]);
24002       if (src2 != dest + halfregs)
24003         emit_move_insn (desthi, operands[2]);
24004     }
24005   else
24006     {
24007       if (src2 != dest + halfregs)
24008         emit_move_insn (desthi, operands[2]);
24009       if (src1 != dest)
24010         emit_move_insn (destlo, operands[1]);
24011     }
24012 }
24013 \f
24014 /* Return the number (counting from 0) of
24015    the least significant set bit in MASK.  */
24016
24017 inline static int
24018 number_of_first_bit_set (unsigned mask)
24019 {
24020   return ctz_hwi (mask);
24021 }
24022
24023 /* Like emit_multi_reg_push, but allowing for a different set of
24024    registers to be described as saved.  MASK is the set of registers
24025    to be saved; REAL_REGS is the set of registers to be described as
24026    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
24027
24028 static rtx_insn *
24029 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24030 {
24031   unsigned long regno;
24032   rtx par[10], tmp, reg;
24033   rtx_insn *insn;
24034   int i, j;
24035
24036   /* Build the parallel of the registers actually being stored.  */
24037   for (i = 0; mask; ++i, mask &= mask - 1)
24038     {
24039       regno = ctz_hwi (mask);
24040       reg = gen_rtx_REG (SImode, regno);
24041
24042       if (i == 0)
24043         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24044       else
24045         tmp = gen_rtx_USE (VOIDmode, reg);
24046
24047       par[i] = tmp;
24048     }
24049
24050   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24051   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24052   tmp = gen_frame_mem (BLKmode, tmp);
24053   tmp = gen_rtx_SET (tmp, par[0]);
24054   par[0] = tmp;
24055
24056   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24057   insn = emit_insn (tmp);
24058
24059   /* Always build the stack adjustment note for unwind info.  */
24060   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24061   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24062   par[0] = tmp;
24063
24064   /* Build the parallel of the registers recorded as saved for unwind.  */
24065   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24066     {
24067       regno = ctz_hwi (real_regs);
24068       reg = gen_rtx_REG (SImode, regno);
24069
24070       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24071       tmp = gen_frame_mem (SImode, tmp);
24072       tmp = gen_rtx_SET (tmp, reg);
24073       RTX_FRAME_RELATED_P (tmp) = 1;
24074       par[j + 1] = tmp;
24075     }
24076
24077   if (j == 0)
24078     tmp = par[0];
24079   else
24080     {
24081       RTX_FRAME_RELATED_P (par[0]) = 1;
24082       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24083     }
24084
24085   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24086
24087   return insn;
24088 }
24089
24090 /* Emit code to push or pop registers to or from the stack.  F is the
24091    assembly file.  MASK is the registers to pop.  */
24092 static void
24093 thumb_pop (FILE *f, unsigned long mask)
24094 {
24095   int regno;
24096   int lo_mask = mask & 0xFF;
24097
24098   gcc_assert (mask);
24099
24100   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24101     {
24102       /* Special case.  Do not generate a POP PC statement here, do it in
24103          thumb_exit() */
24104       thumb_exit (f, -1);
24105       return;
24106     }
24107
24108   fprintf (f, "\tpop\t{");
24109
24110   /* Look at the low registers first.  */
24111   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24112     {
24113       if (lo_mask & 1)
24114         {
24115           asm_fprintf (f, "%r", regno);
24116
24117           if ((lo_mask & ~1) != 0)
24118             fprintf (f, ", ");
24119         }
24120     }
24121
24122   if (mask & (1 << PC_REGNUM))
24123     {
24124       /* Catch popping the PC.  */
24125       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24126           || IS_CMSE_ENTRY (arm_current_func_type ()))
24127         {
24128           /* The PC is never poped directly, instead
24129              it is popped into r3 and then BX is used.  */
24130           fprintf (f, "}\n");
24131
24132           thumb_exit (f, -1);
24133
24134           return;
24135         }
24136       else
24137         {
24138           if (mask & 0xFF)
24139             fprintf (f, ", ");
24140
24141           asm_fprintf (f, "%r", PC_REGNUM);
24142         }
24143     }
24144
24145   fprintf (f, "}\n");
24146 }
24147
24148 /* Generate code to return from a thumb function.
24149    If 'reg_containing_return_addr' is -1, then the return address is
24150    actually on the stack, at the stack pointer.
24151
24152    Note: do not forget to update length attribute of corresponding insn pattern
24153    when changing assembly output (eg. length attribute of epilogue_insns when
24154    updating Armv8-M Baseline Security Extensions register clearing
24155    sequences).  */
24156 static void
24157 thumb_exit (FILE *f, int reg_containing_return_addr)
24158 {
24159   unsigned regs_available_for_popping;
24160   unsigned regs_to_pop;
24161   int pops_needed;
24162   unsigned available;
24163   unsigned required;
24164   machine_mode mode;
24165   int size;
24166   int restore_a4 = FALSE;
24167
24168   /* Compute the registers we need to pop.  */
24169   regs_to_pop = 0;
24170   pops_needed = 0;
24171
24172   if (reg_containing_return_addr == -1)
24173     {
24174       regs_to_pop |= 1 << LR_REGNUM;
24175       ++pops_needed;
24176     }
24177
24178   if (TARGET_BACKTRACE)
24179     {
24180       /* Restore the (ARM) frame pointer and stack pointer.  */
24181       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24182       pops_needed += 2;
24183     }
24184
24185   /* If there is nothing to pop then just emit the BX instruction and
24186      return.  */
24187   if (pops_needed == 0)
24188     {
24189       if (crtl->calls_eh_return)
24190         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24191
24192       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24193         {
24194           asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24195                        reg_containing_return_addr);
24196           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24197         }
24198       else
24199         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24200       return;
24201     }
24202   /* Otherwise if we are not supporting interworking and we have not created
24203      a backtrace structure and the function was not entered in ARM mode then
24204      just pop the return address straight into the PC.  */
24205   else if (!TARGET_INTERWORK
24206            && !TARGET_BACKTRACE
24207            && !is_called_in_ARM_mode (current_function_decl)
24208            && !crtl->calls_eh_return
24209            && !IS_CMSE_ENTRY (arm_current_func_type ()))
24210     {
24211       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24212       return;
24213     }
24214
24215   /* Find out how many of the (return) argument registers we can corrupt.  */
24216   regs_available_for_popping = 0;
24217
24218   /* If returning via __builtin_eh_return, the bottom three registers
24219      all contain information needed for the return.  */
24220   if (crtl->calls_eh_return)
24221     size = 12;
24222   else
24223     {
24224       /* If we can deduce the registers used from the function's
24225          return value.  This is more reliable that examining
24226          df_regs_ever_live_p () because that will be set if the register is
24227          ever used in the function, not just if the register is used
24228          to hold a return value.  */
24229
24230       if (crtl->return_rtx != 0)
24231         mode = GET_MODE (crtl->return_rtx);
24232       else
24233         mode = DECL_MODE (DECL_RESULT (current_function_decl));
24234
24235       size = GET_MODE_SIZE (mode);
24236
24237       if (size == 0)
24238         {
24239           /* In a void function we can use any argument register.
24240              In a function that returns a structure on the stack
24241              we can use the second and third argument registers.  */
24242           if (mode == VOIDmode)
24243             regs_available_for_popping =
24244               (1 << ARG_REGISTER (1))
24245               | (1 << ARG_REGISTER (2))
24246               | (1 << ARG_REGISTER (3));
24247           else
24248             regs_available_for_popping =
24249               (1 << ARG_REGISTER (2))
24250               | (1 << ARG_REGISTER (3));
24251         }
24252       else if (size <= 4)
24253         regs_available_for_popping =
24254           (1 << ARG_REGISTER (2))
24255           | (1 << ARG_REGISTER (3));
24256       else if (size <= 8)
24257         regs_available_for_popping =
24258           (1 << ARG_REGISTER (3));
24259     }
24260
24261   /* Match registers to be popped with registers into which we pop them.  */
24262   for (available = regs_available_for_popping,
24263        required  = regs_to_pop;
24264        required != 0 && available != 0;
24265        available &= ~(available & - available),
24266        required  &= ~(required  & - required))
24267     -- pops_needed;
24268
24269   /* If we have any popping registers left over, remove them.  */
24270   if (available > 0)
24271     regs_available_for_popping &= ~available;
24272
24273   /* Otherwise if we need another popping register we can use
24274      the fourth argument register.  */
24275   else if (pops_needed)
24276     {
24277       /* If we have not found any free argument registers and
24278          reg a4 contains the return address, we must move it.  */
24279       if (regs_available_for_popping == 0
24280           && reg_containing_return_addr == LAST_ARG_REGNUM)
24281         {
24282           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24283           reg_containing_return_addr = LR_REGNUM;
24284         }
24285       else if (size > 12)
24286         {
24287           /* Register a4 is being used to hold part of the return value,
24288              but we have dire need of a free, low register.  */
24289           restore_a4 = TRUE;
24290
24291           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24292         }
24293
24294       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24295         {
24296           /* The fourth argument register is available.  */
24297           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24298
24299           --pops_needed;
24300         }
24301     }
24302
24303   /* Pop as many registers as we can.  */
24304   thumb_pop (f, regs_available_for_popping);
24305
24306   /* Process the registers we popped.  */
24307   if (reg_containing_return_addr == -1)
24308     {
24309       /* The return address was popped into the lowest numbered register.  */
24310       regs_to_pop &= ~(1 << LR_REGNUM);
24311
24312       reg_containing_return_addr =
24313         number_of_first_bit_set (regs_available_for_popping);
24314
24315       /* Remove this register for the mask of available registers, so that
24316          the return address will not be corrupted by further pops.  */
24317       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24318     }
24319
24320   /* If we popped other registers then handle them here.  */
24321   if (regs_available_for_popping)
24322     {
24323       int frame_pointer;
24324
24325       /* Work out which register currently contains the frame pointer.  */
24326       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24327
24328       /* Move it into the correct place.  */
24329       asm_fprintf (f, "\tmov\t%r, %r\n",
24330                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24331
24332       /* (Temporarily) remove it from the mask of popped registers.  */
24333       regs_available_for_popping &= ~(1 << frame_pointer);
24334       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24335
24336       if (regs_available_for_popping)
24337         {
24338           int stack_pointer;
24339
24340           /* We popped the stack pointer as well,
24341              find the register that contains it.  */
24342           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24343
24344           /* Move it into the stack register.  */
24345           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24346
24347           /* At this point we have popped all necessary registers, so
24348              do not worry about restoring regs_available_for_popping
24349              to its correct value:
24350
24351              assert (pops_needed == 0)
24352              assert (regs_available_for_popping == (1 << frame_pointer))
24353              assert (regs_to_pop == (1 << STACK_POINTER))  */
24354         }
24355       else
24356         {
24357           /* Since we have just move the popped value into the frame
24358              pointer, the popping register is available for reuse, and
24359              we know that we still have the stack pointer left to pop.  */
24360           regs_available_for_popping |= (1 << frame_pointer);
24361         }
24362     }
24363
24364   /* If we still have registers left on the stack, but we no longer have
24365      any registers into which we can pop them, then we must move the return
24366      address into the link register and make available the register that
24367      contained it.  */
24368   if (regs_available_for_popping == 0 && pops_needed > 0)
24369     {
24370       regs_available_for_popping |= 1 << reg_containing_return_addr;
24371
24372       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24373                    reg_containing_return_addr);
24374
24375       reg_containing_return_addr = LR_REGNUM;
24376     }
24377
24378   /* If we have registers left on the stack then pop some more.
24379      We know that at most we will want to pop FP and SP.  */
24380   if (pops_needed > 0)
24381     {
24382       int  popped_into;
24383       int  move_to;
24384
24385       thumb_pop (f, regs_available_for_popping);
24386
24387       /* We have popped either FP or SP.
24388          Move whichever one it is into the correct register.  */
24389       popped_into = number_of_first_bit_set (regs_available_for_popping);
24390       move_to     = number_of_first_bit_set (regs_to_pop);
24391
24392       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24393       --pops_needed;
24394     }
24395
24396   /* If we still have not popped everything then we must have only
24397      had one register available to us and we are now popping the SP.  */
24398   if (pops_needed > 0)
24399     {
24400       int  popped_into;
24401
24402       thumb_pop (f, regs_available_for_popping);
24403
24404       popped_into = number_of_first_bit_set (regs_available_for_popping);
24405
24406       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24407       /*
24408         assert (regs_to_pop == (1 << STACK_POINTER))
24409         assert (pops_needed == 1)
24410       */
24411     }
24412
24413   /* If necessary restore the a4 register.  */
24414   if (restore_a4)
24415     {
24416       if (reg_containing_return_addr != LR_REGNUM)
24417         {
24418           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24419           reg_containing_return_addr = LR_REGNUM;
24420         }
24421
24422       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24423     }
24424
24425   if (crtl->calls_eh_return)
24426     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24427
24428   /* Return to caller.  */
24429   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24430     {
24431       /* This is for the cases where LR is not being used to contain the return
24432          address.  It may therefore contain information that we might not want
24433          to leak, hence it must be cleared.  The value in R0 will never be a
24434          secret at this point, so it is safe to use it, see the clearing code
24435          in 'cmse_nonsecure_entry_clear_before_return'.  */
24436       if (reg_containing_return_addr != LR_REGNUM)
24437         asm_fprintf (f, "\tmov\tlr, r0\n");
24438
24439       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24440       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24441     }
24442   else
24443     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24444 }
24445 \f
24446 /* Scan INSN just before assembler is output for it.
24447    For Thumb-1, we track the status of the condition codes; this
24448    information is used in the cbranchsi4_insn pattern.  */
24449 void
24450 thumb1_final_prescan_insn (rtx_insn *insn)
24451 {
24452   if (flag_print_asm_name)
24453     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24454                  INSN_ADDRESSES (INSN_UID (insn)));
24455   /* Don't overwrite the previous setter when we get to a cbranch.  */
24456   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24457     {
24458       enum attr_conds conds;
24459
24460       if (cfun->machine->thumb1_cc_insn)
24461         {
24462           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24463               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24464             CC_STATUS_INIT;
24465         }
24466       conds = get_attr_conds (insn);
24467       if (conds == CONDS_SET)
24468         {
24469           rtx set = single_set (insn);
24470           cfun->machine->thumb1_cc_insn = insn;
24471           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24472           cfun->machine->thumb1_cc_op1 = const0_rtx;
24473           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24474           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24475             {
24476               rtx src1 = XEXP (SET_SRC (set), 1);
24477               if (src1 == const0_rtx)
24478                 cfun->machine->thumb1_cc_mode = CCmode;
24479             }
24480           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24481             {
24482               /* Record the src register operand instead of dest because
24483                  cprop_hardreg pass propagates src.  */
24484               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24485             }
24486         }
24487       else if (conds != CONDS_NOCOND)
24488         cfun->machine->thumb1_cc_insn = NULL_RTX;
24489     }
24490
24491     /* Check if unexpected far jump is used.  */
24492     if (cfun->machine->lr_save_eliminated
24493         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24494       internal_error("Unexpected thumb1 far jump");
24495 }
24496
24497 int
24498 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24499 {
24500   unsigned HOST_WIDE_INT mask = 0xff;
24501   int i;
24502
24503   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24504   if (val == 0) /* XXX */
24505     return 0;
24506
24507   for (i = 0; i < 25; i++)
24508     if ((val & (mask << i)) == val)
24509       return 1;
24510
24511   return 0;
24512 }
24513
24514 /* Returns nonzero if the current function contains,
24515    or might contain a far jump.  */
24516 static int
24517 thumb_far_jump_used_p (void)
24518 {
24519   rtx_insn *insn;
24520   bool far_jump = false;
24521   unsigned int func_size = 0;
24522
24523   /* If we have already decided that far jumps may be used,
24524      do not bother checking again, and always return true even if
24525      it turns out that they are not being used.  Once we have made
24526      the decision that far jumps are present (and that hence the link
24527      register will be pushed onto the stack) we cannot go back on it.  */
24528   if (cfun->machine->far_jump_used)
24529     return 1;
24530
24531   /* If this function is not being called from the prologue/epilogue
24532      generation code then it must be being called from the
24533      INITIAL_ELIMINATION_OFFSET macro.  */
24534   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24535     {
24536       /* In this case we know that we are being asked about the elimination
24537          of the arg pointer register.  If that register is not being used,
24538          then there are no arguments on the stack, and we do not have to
24539          worry that a far jump might force the prologue to push the link
24540          register, changing the stack offsets.  In this case we can just
24541          return false, since the presence of far jumps in the function will
24542          not affect stack offsets.
24543
24544          If the arg pointer is live (or if it was live, but has now been
24545          eliminated and so set to dead) then we do have to test to see if
24546          the function might contain a far jump.  This test can lead to some
24547          false negatives, since before reload is completed, then length of
24548          branch instructions is not known, so gcc defaults to returning their
24549          longest length, which in turn sets the far jump attribute to true.
24550
24551          A false negative will not result in bad code being generated, but it
24552          will result in a needless push and pop of the link register.  We
24553          hope that this does not occur too often.
24554
24555          If we need doubleword stack alignment this could affect the other
24556          elimination offsets so we can't risk getting it wrong.  */
24557       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24558         cfun->machine->arg_pointer_live = 1;
24559       else if (!cfun->machine->arg_pointer_live)
24560         return 0;
24561     }
24562
24563   /* We should not change far_jump_used during or after reload, as there is
24564      no chance to change stack frame layout.  */
24565   if (reload_in_progress || reload_completed)
24566     return 0;
24567
24568   /* Check to see if the function contains a branch
24569      insn with the far jump attribute set.  */
24570   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24571     {
24572       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24573         {
24574           far_jump = true;
24575         }
24576       func_size += get_attr_length (insn);
24577     }
24578
24579   /* Attribute far_jump will always be true for thumb1 before
24580      shorten_branch pass.  So checking far_jump attribute before
24581      shorten_branch isn't much useful.
24582
24583      Following heuristic tries to estimate more accurately if a far jump
24584      may finally be used.  The heuristic is very conservative as there is
24585      no chance to roll-back the decision of not to use far jump.
24586
24587      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24588      2-byte insn is associated with a 4 byte constant pool.  Using
24589      function size 2048/3 as the threshold is conservative enough.  */
24590   if (far_jump)
24591     {
24592       if ((func_size * 3) >= 2048)
24593         {
24594           /* Record the fact that we have decided that
24595              the function does use far jumps.  */
24596           cfun->machine->far_jump_used = 1;
24597           return 1;
24598         }
24599     }
24600
24601   return 0;
24602 }
24603
24604 /* Return nonzero if FUNC must be entered in ARM mode.  */
24605 static bool
24606 is_called_in_ARM_mode (tree func)
24607 {
24608   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24609
24610   /* Ignore the problem about functions whose address is taken.  */
24611   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24612     return true;
24613
24614 #ifdef ARM_PE
24615   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24616 #else
24617   return false;
24618 #endif
24619 }
24620
24621 /* Given the stack offsets and register mask in OFFSETS, decide how
24622    many additional registers to push instead of subtracting a constant
24623    from SP.  For epilogues the principle is the same except we use pop.
24624    FOR_PROLOGUE indicates which we're generating.  */
24625 static int
24626 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24627 {
24628   HOST_WIDE_INT amount;
24629   unsigned long live_regs_mask = offsets->saved_regs_mask;
24630   /* Extract a mask of the ones we can give to the Thumb's push/pop
24631      instruction.  */
24632   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24633   /* Then count how many other high registers will need to be pushed.  */
24634   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24635   int n_free, reg_base, size;
24636
24637   if (!for_prologue && frame_pointer_needed)
24638     amount = offsets->locals_base - offsets->saved_regs;
24639   else
24640     amount = offsets->outgoing_args - offsets->saved_regs;
24641
24642   /* If the stack frame size is 512 exactly, we can save one load
24643      instruction, which should make this a win even when optimizing
24644      for speed.  */
24645   if (!optimize_size && amount != 512)
24646     return 0;
24647
24648   /* Can't do this if there are high registers to push.  */
24649   if (high_regs_pushed != 0)
24650     return 0;
24651
24652   /* Shouldn't do it in the prologue if no registers would normally
24653      be pushed at all.  In the epilogue, also allow it if we'll have
24654      a pop insn for the PC.  */
24655   if  (l_mask == 0
24656        && (for_prologue
24657            || TARGET_BACKTRACE
24658            || (live_regs_mask & 1 << LR_REGNUM) == 0
24659            || TARGET_INTERWORK
24660            || crtl->args.pretend_args_size != 0))
24661     return 0;
24662
24663   /* Don't do this if thumb_expand_prologue wants to emit instructions
24664      between the push and the stack frame allocation.  */
24665   if (for_prologue
24666       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24667           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24668     return 0;
24669
24670   reg_base = 0;
24671   n_free = 0;
24672   if (!for_prologue)
24673     {
24674       size = arm_size_return_regs ();
24675       reg_base = ARM_NUM_INTS (size);
24676       live_regs_mask >>= reg_base;
24677     }
24678
24679   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24680          && (for_prologue || call_used_regs[reg_base + n_free]))
24681     {
24682       live_regs_mask >>= 1;
24683       n_free++;
24684     }
24685
24686   if (n_free == 0)
24687     return 0;
24688   gcc_assert (amount / 4 * 4 == amount);
24689
24690   if (amount >= 512 && (amount - n_free * 4) < 512)
24691     return (amount - 508) / 4;
24692   if (amount <= n_free * 4)
24693     return amount / 4;
24694   return 0;
24695 }
24696
24697 /* The bits which aren't usefully expanded as rtl.  */
24698 const char *
24699 thumb1_unexpanded_epilogue (void)
24700 {
24701   arm_stack_offsets *offsets;
24702   int regno;
24703   unsigned long live_regs_mask = 0;
24704   int high_regs_pushed = 0;
24705   int extra_pop;
24706   int had_to_push_lr;
24707   int size;
24708
24709   if (cfun->machine->return_used_this_function != 0)
24710     return "";
24711
24712   if (IS_NAKED (arm_current_func_type ()))
24713     return "";
24714
24715   offsets = arm_get_frame_offsets ();
24716   live_regs_mask = offsets->saved_regs_mask;
24717   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24718
24719   /* If we can deduce the registers used from the function's return value.
24720      This is more reliable that examining df_regs_ever_live_p () because that
24721      will be set if the register is ever used in the function, not just if
24722      the register is used to hold a return value.  */
24723   size = arm_size_return_regs ();
24724
24725   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24726   if (extra_pop > 0)
24727     {
24728       unsigned long extra_mask = (1 << extra_pop) - 1;
24729       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24730     }
24731
24732   /* The prolog may have pushed some high registers to use as
24733      work registers.  e.g. the testsuite file:
24734      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24735      compiles to produce:
24736         push    {r4, r5, r6, r7, lr}
24737         mov     r7, r9
24738         mov     r6, r8
24739         push    {r6, r7}
24740      as part of the prolog.  We have to undo that pushing here.  */
24741
24742   if (high_regs_pushed)
24743     {
24744       unsigned long mask = live_regs_mask & 0xff;
24745       int next_hi_reg;
24746
24747       /* The available low registers depend on the size of the value we are
24748          returning.  */
24749       if (size <= 12)
24750         mask |=  1 << 3;
24751       if (size <= 8)
24752         mask |= 1 << 2;
24753
24754       if (mask == 0)
24755         /* Oh dear!  We have no low registers into which we can pop
24756            high registers!  */
24757         internal_error
24758           ("no low registers available for popping high registers");
24759
24760       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24761         if (live_regs_mask & (1 << next_hi_reg))
24762           break;
24763
24764       while (high_regs_pushed)
24765         {
24766           /* Find lo register(s) into which the high register(s) can
24767              be popped.  */
24768           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24769             {
24770               if (mask & (1 << regno))
24771                 high_regs_pushed--;
24772               if (high_regs_pushed == 0)
24773                 break;
24774             }
24775
24776           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24777
24778           /* Pop the values into the low register(s).  */
24779           thumb_pop (asm_out_file, mask);
24780
24781           /* Move the value(s) into the high registers.  */
24782           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24783             {
24784               if (mask & (1 << regno))
24785                 {
24786                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24787                                regno);
24788
24789                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24790                     if (live_regs_mask & (1 << next_hi_reg))
24791                       break;
24792                 }
24793             }
24794         }
24795       live_regs_mask &= ~0x0f00;
24796     }
24797
24798   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24799   live_regs_mask &= 0xff;
24800
24801   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24802     {
24803       /* Pop the return address into the PC.  */
24804       if (had_to_push_lr)
24805         live_regs_mask |= 1 << PC_REGNUM;
24806
24807       /* Either no argument registers were pushed or a backtrace
24808          structure was created which includes an adjusted stack
24809          pointer, so just pop everything.  */
24810       if (live_regs_mask)
24811         thumb_pop (asm_out_file, live_regs_mask);
24812
24813       /* We have either just popped the return address into the
24814          PC or it is was kept in LR for the entire function.
24815          Note that thumb_pop has already called thumb_exit if the
24816          PC was in the list.  */
24817       if (!had_to_push_lr)
24818         thumb_exit (asm_out_file, LR_REGNUM);
24819     }
24820   else
24821     {
24822       /* Pop everything but the return address.  */
24823       if (live_regs_mask)
24824         thumb_pop (asm_out_file, live_regs_mask);
24825
24826       if (had_to_push_lr)
24827         {
24828           if (size > 12)
24829             {
24830               /* We have no free low regs, so save one.  */
24831               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24832                            LAST_ARG_REGNUM);
24833             }
24834
24835           /* Get the return address into a temporary register.  */
24836           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24837
24838           if (size > 12)
24839             {
24840               /* Move the return address to lr.  */
24841               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24842                            LAST_ARG_REGNUM);
24843               /* Restore the low register.  */
24844               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24845                            IP_REGNUM);
24846               regno = LR_REGNUM;
24847             }
24848           else
24849             regno = LAST_ARG_REGNUM;
24850         }
24851       else
24852         regno = LR_REGNUM;
24853
24854       /* Remove the argument registers that were pushed onto the stack.  */
24855       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24856                    SP_REGNUM, SP_REGNUM,
24857                    crtl->args.pretend_args_size);
24858
24859       thumb_exit (asm_out_file, regno);
24860     }
24861
24862   return "";
24863 }
24864
24865 /* Functions to save and restore machine-specific function data.  */
24866 static struct machine_function *
24867 arm_init_machine_status (void)
24868 {
24869   struct machine_function *machine;
24870   machine = ggc_cleared_alloc<machine_function> ();
24871
24872 #if ARM_FT_UNKNOWN != 0
24873   machine->func_type = ARM_FT_UNKNOWN;
24874 #endif
24875   machine->static_chain_stack_bytes = -1;
24876   return machine;
24877 }
24878
24879 /* Return an RTX indicating where the return address to the
24880    calling function can be found.  */
24881 rtx
24882 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24883 {
24884   if (count != 0)
24885     return NULL_RTX;
24886
24887   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24888 }
24889
24890 /* Do anything needed before RTL is emitted for each function.  */
24891 void
24892 arm_init_expanders (void)
24893 {
24894   /* Arrange to initialize and mark the machine per-function status.  */
24895   init_machine_status = arm_init_machine_status;
24896
24897   /* This is to stop the combine pass optimizing away the alignment
24898      adjustment of va_arg.  */
24899   /* ??? It is claimed that this should not be necessary.  */
24900   if (cfun)
24901     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24902 }
24903
24904 /* Check that FUNC is called with a different mode.  */
24905
24906 bool
24907 arm_change_mode_p (tree func)
24908 {
24909   if (TREE_CODE (func) != FUNCTION_DECL)
24910     return false;
24911
24912   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24913
24914   if (!callee_tree)
24915     callee_tree = target_option_default_node;
24916
24917   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24918   int flags = callee_opts->x_target_flags;
24919
24920   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24921 }
24922
24923 /* Like arm_compute_initial_elimination offset.  Simpler because there
24924    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24925    to point at the base of the local variables after static stack
24926    space for a function has been allocated.  */
24927
24928 HOST_WIDE_INT
24929 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24930 {
24931   arm_stack_offsets *offsets;
24932
24933   offsets = arm_get_frame_offsets ();
24934
24935   switch (from)
24936     {
24937     case ARG_POINTER_REGNUM:
24938       switch (to)
24939         {
24940         case STACK_POINTER_REGNUM:
24941           return offsets->outgoing_args - offsets->saved_args;
24942
24943         case FRAME_POINTER_REGNUM:
24944           return offsets->soft_frame - offsets->saved_args;
24945
24946         case ARM_HARD_FRAME_POINTER_REGNUM:
24947           return offsets->saved_regs - offsets->saved_args;
24948
24949         case THUMB_HARD_FRAME_POINTER_REGNUM:
24950           return offsets->locals_base - offsets->saved_args;
24951
24952         default:
24953           gcc_unreachable ();
24954         }
24955       break;
24956
24957     case FRAME_POINTER_REGNUM:
24958       switch (to)
24959         {
24960         case STACK_POINTER_REGNUM:
24961           return offsets->outgoing_args - offsets->soft_frame;
24962
24963         case ARM_HARD_FRAME_POINTER_REGNUM:
24964           return offsets->saved_regs - offsets->soft_frame;
24965
24966         case THUMB_HARD_FRAME_POINTER_REGNUM:
24967           return offsets->locals_base - offsets->soft_frame;
24968
24969         default:
24970           gcc_unreachable ();
24971         }
24972       break;
24973
24974     default:
24975       gcc_unreachable ();
24976     }
24977 }
24978
24979 /* Generate the function's prologue.  */
24980
24981 void
24982 thumb1_expand_prologue (void)
24983 {
24984   rtx_insn *insn;
24985
24986   HOST_WIDE_INT amount;
24987   HOST_WIDE_INT size;
24988   arm_stack_offsets *offsets;
24989   unsigned long func_type;
24990   int regno;
24991   unsigned long live_regs_mask;
24992   unsigned long l_mask;
24993   unsigned high_regs_pushed = 0;
24994   bool lr_needs_saving;
24995
24996   func_type = arm_current_func_type ();
24997
24998   /* Naked functions don't have prologues.  */
24999   if (IS_NAKED (func_type))
25000     {
25001       if (flag_stack_usage_info)
25002         current_function_static_stack_size = 0;
25003       return;
25004     }
25005
25006   if (IS_INTERRUPT (func_type))
25007     {
25008       error ("interrupt Service Routines cannot be coded in Thumb mode");
25009       return;
25010     }
25011
25012   if (is_called_in_ARM_mode (current_function_decl))
25013     emit_insn (gen_prologue_thumb1_interwork ());
25014
25015   offsets = arm_get_frame_offsets ();
25016   live_regs_mask = offsets->saved_regs_mask;
25017   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25018
25019   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
25020   l_mask = live_regs_mask & 0x40ff;
25021   /* Then count how many other high registers will need to be pushed.  */
25022   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25023
25024   if (crtl->args.pretend_args_size)
25025     {
25026       rtx x = GEN_INT (-crtl->args.pretend_args_size);
25027
25028       if (cfun->machine->uses_anonymous_args)
25029         {
25030           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25031           unsigned long mask;
25032
25033           mask = 1ul << (LAST_ARG_REGNUM + 1);
25034           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25035
25036           insn = thumb1_emit_multi_reg_push (mask, 0);
25037         }
25038       else
25039         {
25040           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25041                                         stack_pointer_rtx, x));
25042         }
25043       RTX_FRAME_RELATED_P (insn) = 1;
25044     }
25045
25046   if (TARGET_BACKTRACE)
25047     {
25048       HOST_WIDE_INT offset = 0;
25049       unsigned work_register;
25050       rtx work_reg, x, arm_hfp_rtx;
25051
25052       /* We have been asked to create a stack backtrace structure.
25053          The code looks like this:
25054
25055          0   .align 2
25056          0   func:
25057          0     sub   SP, #16         Reserve space for 4 registers.
25058          2     push  {R7}            Push low registers.
25059          4     add   R7, SP, #20     Get the stack pointer before the push.
25060          6     str   R7, [SP, #8]    Store the stack pointer
25061                                         (before reserving the space).
25062          8     mov   R7, PC          Get hold of the start of this code + 12.
25063         10     str   R7, [SP, #16]   Store it.
25064         12     mov   R7, FP          Get hold of the current frame pointer.
25065         14     str   R7, [SP, #4]    Store it.
25066         16     mov   R7, LR          Get hold of the current return address.
25067         18     str   R7, [SP, #12]   Store it.
25068         20     add   R7, SP, #16     Point at the start of the
25069                                         backtrace structure.
25070         22     mov   FP, R7          Put this value into the frame pointer.  */
25071
25072       work_register = thumb_find_work_register (live_regs_mask);
25073       work_reg = gen_rtx_REG (SImode, work_register);
25074       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25075
25076       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25077                                     stack_pointer_rtx, GEN_INT (-16)));
25078       RTX_FRAME_RELATED_P (insn) = 1;
25079
25080       if (l_mask)
25081         {
25082           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25083           RTX_FRAME_RELATED_P (insn) = 1;
25084           lr_needs_saving = false;
25085
25086           offset = bit_count (l_mask) * UNITS_PER_WORD;
25087         }
25088
25089       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25090       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25091
25092       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25093       x = gen_frame_mem (SImode, x);
25094       emit_move_insn (x, work_reg);
25095
25096       /* Make sure that the instruction fetching the PC is in the right place
25097          to calculate "start of backtrace creation code + 12".  */
25098       /* ??? The stores using the common WORK_REG ought to be enough to
25099          prevent the scheduler from doing anything weird.  Failing that
25100          we could always move all of the following into an UNSPEC_VOLATILE.  */
25101       if (l_mask)
25102         {
25103           x = gen_rtx_REG (SImode, PC_REGNUM);
25104           emit_move_insn (work_reg, x);
25105
25106           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25107           x = gen_frame_mem (SImode, x);
25108           emit_move_insn (x, work_reg);
25109
25110           emit_move_insn (work_reg, arm_hfp_rtx);
25111
25112           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25113           x = gen_frame_mem (SImode, x);
25114           emit_move_insn (x, work_reg);
25115         }
25116       else
25117         {
25118           emit_move_insn (work_reg, arm_hfp_rtx);
25119
25120           x = plus_constant (Pmode, stack_pointer_rtx, offset);
25121           x = gen_frame_mem (SImode, x);
25122           emit_move_insn (x, work_reg);
25123
25124           x = gen_rtx_REG (SImode, PC_REGNUM);
25125           emit_move_insn (work_reg, x);
25126
25127           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25128           x = gen_frame_mem (SImode, x);
25129           emit_move_insn (x, work_reg);
25130         }
25131
25132       x = gen_rtx_REG (SImode, LR_REGNUM);
25133       emit_move_insn (work_reg, x);
25134
25135       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25136       x = gen_frame_mem (SImode, x);
25137       emit_move_insn (x, work_reg);
25138
25139       x = GEN_INT (offset + 12);
25140       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25141
25142       emit_move_insn (arm_hfp_rtx, work_reg);
25143     }
25144   /* Optimization:  If we are not pushing any low registers but we are going
25145      to push some high registers then delay our first push.  This will just
25146      be a push of LR and we can combine it with the push of the first high
25147      register.  */
25148   else if ((l_mask & 0xff) != 0
25149            || (high_regs_pushed == 0 && lr_needs_saving))
25150     {
25151       unsigned long mask = l_mask;
25152       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25153       insn = thumb1_emit_multi_reg_push (mask, mask);
25154       RTX_FRAME_RELATED_P (insn) = 1;
25155       lr_needs_saving = false;
25156     }
25157
25158   if (high_regs_pushed)
25159     {
25160       unsigned pushable_regs;
25161       unsigned next_hi_reg;
25162       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25163                                                  : crtl->args.info.nregs;
25164       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25165
25166       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25167         if (live_regs_mask & (1 << next_hi_reg))
25168           break;
25169
25170       /* Here we need to mask out registers used for passing arguments
25171          even if they can be pushed.  This is to avoid using them to stash the high
25172          registers.  Such kind of stash may clobber the use of arguments.  */
25173       pushable_regs = l_mask & (~arg_regs_mask);
25174       if (lr_needs_saving)
25175         pushable_regs &= ~(1 << LR_REGNUM);
25176
25177       if (pushable_regs == 0)
25178         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25179
25180       while (high_regs_pushed > 0)
25181         {
25182           unsigned long real_regs_mask = 0;
25183           unsigned long push_mask = 0;
25184
25185           for (regno = LR_REGNUM; regno >= 0; regno --)
25186             {
25187               if (pushable_regs & (1 << regno))
25188                 {
25189                   emit_move_insn (gen_rtx_REG (SImode, regno),
25190                                   gen_rtx_REG (SImode, next_hi_reg));
25191
25192                   high_regs_pushed --;
25193                   real_regs_mask |= (1 << next_hi_reg);
25194                   push_mask |= (1 << regno);
25195
25196                   if (high_regs_pushed)
25197                     {
25198                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25199                            next_hi_reg --)
25200                         if (live_regs_mask & (1 << next_hi_reg))
25201                           break;
25202                     }
25203                   else
25204                     break;
25205                 }
25206             }
25207
25208           /* If we had to find a work register and we have not yet
25209              saved the LR then add it to the list of regs to push.  */
25210           if (lr_needs_saving)
25211             {
25212               push_mask |= 1 << LR_REGNUM;
25213               real_regs_mask |= 1 << LR_REGNUM;
25214               lr_needs_saving = false;
25215             }
25216
25217           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25218           RTX_FRAME_RELATED_P (insn) = 1;
25219         }
25220     }
25221
25222   /* Load the pic register before setting the frame pointer,
25223      so we can use r7 as a temporary work register.  */
25224   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25225     arm_load_pic_register (live_regs_mask);
25226
25227   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25228     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25229                     stack_pointer_rtx);
25230
25231   size = offsets->outgoing_args - offsets->saved_args;
25232   if (flag_stack_usage_info)
25233     current_function_static_stack_size = size;
25234
25235   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25236   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25237        || flag_stack_clash_protection)
25238       && size)
25239     sorry ("-fstack-check=specific for Thumb-1");
25240
25241   amount = offsets->outgoing_args - offsets->saved_regs;
25242   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25243   if (amount)
25244     {
25245       if (amount < 512)
25246         {
25247           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25248                                         GEN_INT (- amount)));
25249           RTX_FRAME_RELATED_P (insn) = 1;
25250         }
25251       else
25252         {
25253           rtx reg, dwarf;
25254
25255           /* The stack decrement is too big for an immediate value in a single
25256              insn.  In theory we could issue multiple subtracts, but after
25257              three of them it becomes more space efficient to place the full
25258              value in the constant pool and load into a register.  (Also the
25259              ARM debugger really likes to see only one stack decrement per
25260              function).  So instead we look for a scratch register into which
25261              we can load the decrement, and then we subtract this from the
25262              stack pointer.  Unfortunately on the thumb the only available
25263              scratch registers are the argument registers, and we cannot use
25264              these as they may hold arguments to the function.  Instead we
25265              attempt to locate a call preserved register which is used by this
25266              function.  If we can find one, then we know that it will have
25267              been pushed at the start of the prologue and so we can corrupt
25268              it now.  */
25269           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25270             if (live_regs_mask & (1 << regno))
25271               break;
25272
25273           gcc_assert(regno <= LAST_LO_REGNUM);
25274
25275           reg = gen_rtx_REG (SImode, regno);
25276
25277           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25278
25279           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25280                                         stack_pointer_rtx, reg));
25281
25282           dwarf = gen_rtx_SET (stack_pointer_rtx,
25283                                plus_constant (Pmode, stack_pointer_rtx,
25284                                               -amount));
25285           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25286           RTX_FRAME_RELATED_P (insn) = 1;
25287         }
25288     }
25289
25290   if (frame_pointer_needed)
25291     thumb_set_frame_pointer (offsets);
25292
25293   /* If we are profiling, make sure no instructions are scheduled before
25294      the call to mcount.  Similarly if the user has requested no
25295      scheduling in the prolog.  Similarly if we want non-call exceptions
25296      using the EABI unwinder, to prevent faulting instructions from being
25297      swapped with a stack adjustment.  */
25298   if (crtl->profile || !TARGET_SCHED_PROLOG
25299       || (arm_except_unwind_info (&global_options) == UI_TARGET
25300           && cfun->can_throw_non_call_exceptions))
25301     emit_insn (gen_blockage ());
25302
25303   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25304   if (live_regs_mask & 0xff)
25305     cfun->machine->lr_save_eliminated = 0;
25306 }
25307
25308 /* Clear caller saved registers not used to pass return values and leaked
25309    condition flags before exiting a cmse_nonsecure_entry function.  */
25310
25311 void
25312 cmse_nonsecure_entry_clear_before_return (void)
25313 {
25314   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25315   uint32_t padding_bits_to_clear = 0;
25316   auto_sbitmap to_clear_bitmap (maxregno + 1);
25317   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25318   tree result_type;
25319
25320   bitmap_clear (to_clear_bitmap);
25321   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25322   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25323
25324   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25325      registers.  */
25326   if (TARGET_HARD_FLOAT)
25327     {
25328       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25329
25330       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25331
25332       /* Make sure we don't clear the two scratch registers used to clear the
25333          relevant FPSCR bits in output_return_instruction.  */
25334       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25335       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25336       emit_use (gen_rtx_REG (SImode, 4));
25337       bitmap_clear_bit (to_clear_bitmap, 4);
25338     }
25339
25340   /* If the user has defined registers to be caller saved, these are no longer
25341      restored by the function before returning and must thus be cleared for
25342      security purposes.  */
25343   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25344     {
25345       /* We do not touch registers that can be used to pass arguments as per
25346          the AAPCS, since these should never be made callee-saved by user
25347          options.  */
25348       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25349         continue;
25350       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25351         continue;
25352       if (call_used_regs[regno])
25353         bitmap_set_bit (to_clear_bitmap, regno);
25354     }
25355
25356   /* Make sure we do not clear the registers used to return the result in.  */
25357   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25358   if (!VOID_TYPE_P (result_type))
25359     {
25360       uint64_t to_clear_return_mask;
25361       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25362
25363       /* No need to check that we return in registers, because we don't
25364          support returning on stack yet.  */
25365       gcc_assert (REG_P (result_rtl));
25366       to_clear_return_mask
25367         = compute_not_to_clear_mask (result_type, result_rtl, 0,
25368                                      &padding_bits_to_clear);
25369       if (to_clear_return_mask)
25370         {
25371           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25372           for (regno = R0_REGNUM; regno <= maxregno; regno++)
25373             {
25374               if (to_clear_return_mask & (1ULL << regno))
25375                 bitmap_clear_bit (to_clear_bitmap, regno);
25376             }
25377         }
25378     }
25379
25380   if (padding_bits_to_clear != 0)
25381     {
25382       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25383       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25384
25385       /* Padding_bits_to_clear is not 0 so we know we are dealing with
25386          returning a composite type, which only uses r0.  Let's make sure that
25387          r1-r3 is cleared too.  */
25388       bitmap_clear (to_clear_arg_regs_bitmap);
25389       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25390       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25391     }
25392
25393   /* Clear full registers that leak before returning.  */
25394   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25395   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25396   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25397                         clearing_reg);
25398 }
25399
25400 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25401    POP instruction can be generated.  LR should be replaced by PC.  All
25402    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25403    all we really need to check here is if single register is to be
25404    returned, or multiple register return.  */
25405 void
25406 thumb2_expand_return (bool simple_return)
25407 {
25408   int i, num_regs;
25409   unsigned long saved_regs_mask;
25410   arm_stack_offsets *offsets;
25411
25412   offsets = arm_get_frame_offsets ();
25413   saved_regs_mask = offsets->saved_regs_mask;
25414
25415   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25416     if (saved_regs_mask & (1 << i))
25417       num_regs++;
25418
25419   if (!simple_return && saved_regs_mask)
25420     {
25421       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25422          functions or adapt code to handle according to ACLE.  This path should
25423          not be reachable for cmse_nonsecure_entry functions though we prefer
25424          to assert it for now to ensure that future code changes do not silently
25425          change this behavior.  */
25426       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25427       if (num_regs == 1)
25428         {
25429           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25430           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25431           rtx addr = gen_rtx_MEM (SImode,
25432                                   gen_rtx_POST_INC (SImode,
25433                                                     stack_pointer_rtx));
25434           set_mem_alias_set (addr, get_frame_alias_set ());
25435           XVECEXP (par, 0, 0) = ret_rtx;
25436           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25437           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25438           emit_jump_insn (par);
25439         }
25440       else
25441         {
25442           saved_regs_mask &= ~ (1 << LR_REGNUM);
25443           saved_regs_mask |=   (1 << PC_REGNUM);
25444           arm_emit_multi_reg_pop (saved_regs_mask);
25445         }
25446     }
25447   else
25448     {
25449       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25450         cmse_nonsecure_entry_clear_before_return ();
25451       emit_jump_insn (simple_return_rtx);
25452     }
25453 }
25454
25455 void
25456 thumb1_expand_epilogue (void)
25457 {
25458   HOST_WIDE_INT amount;
25459   arm_stack_offsets *offsets;
25460   int regno;
25461
25462   /* Naked functions don't have prologues.  */
25463   if (IS_NAKED (arm_current_func_type ()))
25464     return;
25465
25466   offsets = arm_get_frame_offsets ();
25467   amount = offsets->outgoing_args - offsets->saved_regs;
25468
25469   if (frame_pointer_needed)
25470     {
25471       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25472       amount = offsets->locals_base - offsets->saved_regs;
25473     }
25474   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25475
25476   gcc_assert (amount >= 0);
25477   if (amount)
25478     {
25479       emit_insn (gen_blockage ());
25480
25481       if (amount < 512)
25482         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25483                                GEN_INT (amount)));
25484       else
25485         {
25486           /* r3 is always free in the epilogue.  */
25487           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25488
25489           emit_insn (gen_movsi (reg, GEN_INT (amount)));
25490           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25491         }
25492     }
25493
25494   /* Emit a USE (stack_pointer_rtx), so that
25495      the stack adjustment will not be deleted.  */
25496   emit_insn (gen_force_register_use (stack_pointer_rtx));
25497
25498   if (crtl->profile || !TARGET_SCHED_PROLOG)
25499     emit_insn (gen_blockage ());
25500
25501   /* Emit a clobber for each insn that will be restored in the epilogue,
25502      so that flow2 will get register lifetimes correct.  */
25503   for (regno = 0; regno < 13; regno++)
25504     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25505       emit_clobber (gen_rtx_REG (SImode, regno));
25506
25507   if (! df_regs_ever_live_p (LR_REGNUM))
25508     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25509
25510   /* Clear all caller-saved regs that are not used to return.  */
25511   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25512     cmse_nonsecure_entry_clear_before_return ();
25513 }
25514
25515 /* Epilogue code for APCS frame.  */
25516 static void
25517 arm_expand_epilogue_apcs_frame (bool really_return)
25518 {
25519   unsigned long func_type;
25520   unsigned long saved_regs_mask;
25521   int num_regs = 0;
25522   int i;
25523   int floats_from_frame = 0;
25524   arm_stack_offsets *offsets;
25525
25526   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25527   func_type = arm_current_func_type ();
25528
25529   /* Get frame offsets for ARM.  */
25530   offsets = arm_get_frame_offsets ();
25531   saved_regs_mask = offsets->saved_regs_mask;
25532
25533   /* Find the offset of the floating-point save area in the frame.  */
25534   floats_from_frame
25535     = (offsets->saved_args
25536        + arm_compute_static_chain_stack_bytes ()
25537        - offsets->frame);
25538
25539   /* Compute how many core registers saved and how far away the floats are.  */
25540   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25541     if (saved_regs_mask & (1 << i))
25542       {
25543         num_regs++;
25544         floats_from_frame += 4;
25545       }
25546
25547   if (TARGET_HARD_FLOAT)
25548     {
25549       int start_reg;
25550       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25551
25552       /* The offset is from IP_REGNUM.  */
25553       int saved_size = arm_get_vfp_saved_size ();
25554       if (saved_size > 0)
25555         {
25556           rtx_insn *insn;
25557           floats_from_frame += saved_size;
25558           insn = emit_insn (gen_addsi3 (ip_rtx,
25559                                         hard_frame_pointer_rtx,
25560                                         GEN_INT (-floats_from_frame)));
25561           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25562                                        ip_rtx, hard_frame_pointer_rtx);
25563         }
25564
25565       /* Generate VFP register multi-pop.  */
25566       start_reg = FIRST_VFP_REGNUM;
25567
25568       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25569         /* Look for a case where a reg does not need restoring.  */
25570         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25571             && (!df_regs_ever_live_p (i + 1)
25572                 || call_used_regs[i + 1]))
25573           {
25574             if (start_reg != i)
25575               arm_emit_vfp_multi_reg_pop (start_reg,
25576                                           (i - start_reg) / 2,
25577                                           gen_rtx_REG (SImode,
25578                                                        IP_REGNUM));
25579             start_reg = i + 2;
25580           }
25581
25582       /* Restore the remaining regs that we have discovered (or possibly
25583          even all of them, if the conditional in the for loop never
25584          fired).  */
25585       if (start_reg != i)
25586         arm_emit_vfp_multi_reg_pop (start_reg,
25587                                     (i - start_reg) / 2,
25588                                     gen_rtx_REG (SImode, IP_REGNUM));
25589     }
25590
25591   if (TARGET_IWMMXT)
25592     {
25593       /* The frame pointer is guaranteed to be non-double-word aligned, as
25594          it is set to double-word-aligned old_stack_pointer - 4.  */
25595       rtx_insn *insn;
25596       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25597
25598       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25599         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25600           {
25601             rtx addr = gen_frame_mem (V2SImode,
25602                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25603                                                 - lrm_count * 4));
25604             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25605             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25606                                                gen_rtx_REG (V2SImode, i),
25607                                                NULL_RTX);
25608             lrm_count += 2;
25609           }
25610     }
25611
25612   /* saved_regs_mask should contain IP which contains old stack pointer
25613      at the time of activation creation.  Since SP and IP are adjacent registers,
25614      we can restore the value directly into SP.  */
25615   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25616   saved_regs_mask &= ~(1 << IP_REGNUM);
25617   saved_regs_mask |= (1 << SP_REGNUM);
25618
25619   /* There are two registers left in saved_regs_mask - LR and PC.  We
25620      only need to restore LR (the return address), but to
25621      save time we can load it directly into PC, unless we need a
25622      special function exit sequence, or we are not really returning.  */
25623   if (really_return
25624       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25625       && !crtl->calls_eh_return)
25626     /* Delete LR from the register mask, so that LR on
25627        the stack is loaded into the PC in the register mask.  */
25628     saved_regs_mask &= ~(1 << LR_REGNUM);
25629   else
25630     saved_regs_mask &= ~(1 << PC_REGNUM);
25631
25632   num_regs = bit_count (saved_regs_mask);
25633   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25634     {
25635       rtx_insn *insn;
25636       emit_insn (gen_blockage ());
25637       /* Unwind the stack to just below the saved registers.  */
25638       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25639                                     hard_frame_pointer_rtx,
25640                                     GEN_INT (- 4 * num_regs)));
25641
25642       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25643                                    stack_pointer_rtx, hard_frame_pointer_rtx);
25644     }
25645
25646   arm_emit_multi_reg_pop (saved_regs_mask);
25647
25648   if (IS_INTERRUPT (func_type))
25649     {
25650       /* Interrupt handlers will have pushed the
25651          IP onto the stack, so restore it now.  */
25652       rtx_insn *insn;
25653       rtx addr = gen_rtx_MEM (SImode,
25654                               gen_rtx_POST_INC (SImode,
25655                               stack_pointer_rtx));
25656       set_mem_alias_set (addr, get_frame_alias_set ());
25657       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25658       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25659                                          gen_rtx_REG (SImode, IP_REGNUM),
25660                                          NULL_RTX);
25661     }
25662
25663   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25664     return;
25665
25666   if (crtl->calls_eh_return)
25667     emit_insn (gen_addsi3 (stack_pointer_rtx,
25668                            stack_pointer_rtx,
25669                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25670
25671   if (IS_STACKALIGN (func_type))
25672     /* Restore the original stack pointer.  Before prologue, the stack was
25673        realigned and the original stack pointer saved in r0.  For details,
25674        see comment in arm_expand_prologue.  */
25675     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25676
25677   emit_jump_insn (simple_return_rtx);
25678 }
25679
25680 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25681    function is not a sibcall.  */
25682 void
25683 arm_expand_epilogue (bool really_return)
25684 {
25685   unsigned long func_type;
25686   unsigned long saved_regs_mask;
25687   int num_regs = 0;
25688   int i;
25689   int amount;
25690   arm_stack_offsets *offsets;
25691
25692   func_type = arm_current_func_type ();
25693
25694   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25695      let output_return_instruction take care of instruction emission if any.  */
25696   if (IS_NAKED (func_type)
25697       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25698     {
25699       if (really_return)
25700         emit_jump_insn (simple_return_rtx);
25701       return;
25702     }
25703
25704   /* If we are throwing an exception, then we really must be doing a
25705      return, so we can't tail-call.  */
25706   gcc_assert (!crtl->calls_eh_return || really_return);
25707
25708   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25709     {
25710       arm_expand_epilogue_apcs_frame (really_return);
25711       return;
25712     }
25713
25714   /* Get frame offsets for ARM.  */
25715   offsets = arm_get_frame_offsets ();
25716   saved_regs_mask = offsets->saved_regs_mask;
25717   num_regs = bit_count (saved_regs_mask);
25718
25719   if (frame_pointer_needed)
25720     {
25721       rtx_insn *insn;
25722       /* Restore stack pointer if necessary.  */
25723       if (TARGET_ARM)
25724         {
25725           /* In ARM mode, frame pointer points to first saved register.
25726              Restore stack pointer to last saved register.  */
25727           amount = offsets->frame - offsets->saved_regs;
25728
25729           /* Force out any pending memory operations that reference stacked data
25730              before stack de-allocation occurs.  */
25731           emit_insn (gen_blockage ());
25732           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25733                             hard_frame_pointer_rtx,
25734                             GEN_INT (amount)));
25735           arm_add_cfa_adjust_cfa_note (insn, amount,
25736                                        stack_pointer_rtx,
25737                                        hard_frame_pointer_rtx);
25738
25739           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25740              deleted.  */
25741           emit_insn (gen_force_register_use (stack_pointer_rtx));
25742         }
25743       else
25744         {
25745           /* In Thumb-2 mode, the frame pointer points to the last saved
25746              register.  */
25747           amount = offsets->locals_base - offsets->saved_regs;
25748           if (amount)
25749             {
25750               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25751                                 hard_frame_pointer_rtx,
25752                                 GEN_INT (amount)));
25753               arm_add_cfa_adjust_cfa_note (insn, amount,
25754                                            hard_frame_pointer_rtx,
25755                                            hard_frame_pointer_rtx);
25756             }
25757
25758           /* Force out any pending memory operations that reference stacked data
25759              before stack de-allocation occurs.  */
25760           emit_insn (gen_blockage ());
25761           insn = emit_insn (gen_movsi (stack_pointer_rtx,
25762                                        hard_frame_pointer_rtx));
25763           arm_add_cfa_adjust_cfa_note (insn, 0,
25764                                        stack_pointer_rtx,
25765                                        hard_frame_pointer_rtx);
25766           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25767              deleted.  */
25768           emit_insn (gen_force_register_use (stack_pointer_rtx));
25769         }
25770     }
25771   else
25772     {
25773       /* Pop off outgoing args and local frame to adjust stack pointer to
25774          last saved register.  */
25775       amount = offsets->outgoing_args - offsets->saved_regs;
25776       if (amount)
25777         {
25778           rtx_insn *tmp;
25779           /* Force out any pending memory operations that reference stacked data
25780              before stack de-allocation occurs.  */
25781           emit_insn (gen_blockage ());
25782           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25783                                        stack_pointer_rtx,
25784                                        GEN_INT (amount)));
25785           arm_add_cfa_adjust_cfa_note (tmp, amount,
25786                                        stack_pointer_rtx, stack_pointer_rtx);
25787           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25788              not deleted.  */
25789           emit_insn (gen_force_register_use (stack_pointer_rtx));
25790         }
25791     }
25792
25793   if (TARGET_HARD_FLOAT)
25794     {
25795       /* Generate VFP register multi-pop.  */
25796       int end_reg = LAST_VFP_REGNUM + 1;
25797
25798       /* Scan the registers in reverse order.  We need to match
25799          any groupings made in the prologue and generate matching
25800          vldm operations.  The need to match groups is because,
25801          unlike pop, vldm can only do consecutive regs.  */
25802       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25803         /* Look for a case where a reg does not need restoring.  */
25804         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25805             && (!df_regs_ever_live_p (i + 1)
25806                 || call_used_regs[i + 1]))
25807           {
25808             /* Restore the regs discovered so far (from reg+2 to
25809                end_reg).  */
25810             if (end_reg > i + 2)
25811               arm_emit_vfp_multi_reg_pop (i + 2,
25812                                           (end_reg - (i + 2)) / 2,
25813                                           stack_pointer_rtx);
25814             end_reg = i;
25815           }
25816
25817       /* Restore the remaining regs that we have discovered (or possibly
25818          even all of them, if the conditional in the for loop never
25819          fired).  */
25820       if (end_reg > i + 2)
25821         arm_emit_vfp_multi_reg_pop (i + 2,
25822                                     (end_reg - (i + 2)) / 2,
25823                                     stack_pointer_rtx);
25824     }
25825
25826   if (TARGET_IWMMXT)
25827     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25828       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25829         {
25830           rtx_insn *insn;
25831           rtx addr = gen_rtx_MEM (V2SImode,
25832                                   gen_rtx_POST_INC (SImode,
25833                                                     stack_pointer_rtx));
25834           set_mem_alias_set (addr, get_frame_alias_set ());
25835           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25836           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25837                                              gen_rtx_REG (V2SImode, i),
25838                                              NULL_RTX);
25839           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25840                                        stack_pointer_rtx, stack_pointer_rtx);
25841         }
25842
25843   if (saved_regs_mask)
25844     {
25845       rtx insn;
25846       bool return_in_pc = false;
25847
25848       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25849           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25850           && !IS_CMSE_ENTRY (func_type)
25851           && !IS_STACKALIGN (func_type)
25852           && really_return
25853           && crtl->args.pretend_args_size == 0
25854           && saved_regs_mask & (1 << LR_REGNUM)
25855           && !crtl->calls_eh_return)
25856         {
25857           saved_regs_mask &= ~(1 << LR_REGNUM);
25858           saved_regs_mask |= (1 << PC_REGNUM);
25859           return_in_pc = true;
25860         }
25861
25862       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25863         {
25864           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25865             if (saved_regs_mask & (1 << i))
25866               {
25867                 rtx addr = gen_rtx_MEM (SImode,
25868                                         gen_rtx_POST_INC (SImode,
25869                                                           stack_pointer_rtx));
25870                 set_mem_alias_set (addr, get_frame_alias_set ());
25871
25872                 if (i == PC_REGNUM)
25873                   {
25874                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25875                     XVECEXP (insn, 0, 0) = ret_rtx;
25876                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25877                                                         addr);
25878                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25879                     insn = emit_jump_insn (insn);
25880                   }
25881                 else
25882                   {
25883                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25884                                                  addr));
25885                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25886                                                        gen_rtx_REG (SImode, i),
25887                                                        NULL_RTX);
25888                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25889                                                  stack_pointer_rtx,
25890                                                  stack_pointer_rtx);
25891                   }
25892               }
25893         }
25894       else
25895         {
25896           if (TARGET_LDRD
25897               && current_tune->prefer_ldrd_strd
25898               && !optimize_function_for_size_p (cfun))
25899             {
25900               if (TARGET_THUMB2)
25901                 thumb2_emit_ldrd_pop (saved_regs_mask);
25902               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25903                 arm_emit_ldrd_pop (saved_regs_mask);
25904               else
25905                 arm_emit_multi_reg_pop (saved_regs_mask);
25906             }
25907           else
25908             arm_emit_multi_reg_pop (saved_regs_mask);
25909         }
25910
25911       if (return_in_pc)
25912         return;
25913     }
25914
25915   amount
25916     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25917   if (amount)
25918     {
25919       int i, j;
25920       rtx dwarf = NULL_RTX;
25921       rtx_insn *tmp =
25922         emit_insn (gen_addsi3 (stack_pointer_rtx,
25923                                stack_pointer_rtx,
25924                                GEN_INT (amount)));
25925
25926       RTX_FRAME_RELATED_P (tmp) = 1;
25927
25928       if (cfun->machine->uses_anonymous_args)
25929         {
25930           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25931              pretend_args in stack.  */
25932           int num_regs = crtl->args.pretend_args_size / 4;
25933           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25934           for (j = 0, i = 0; j < num_regs; i++)
25935             if (saved_regs_mask & (1 << i))
25936               {
25937                 rtx reg = gen_rtx_REG (SImode, i);
25938                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25939                 j++;
25940               }
25941           REG_NOTES (tmp) = dwarf;
25942         }
25943       arm_add_cfa_adjust_cfa_note (tmp, amount,
25944                                    stack_pointer_rtx, stack_pointer_rtx);
25945     }
25946
25947     /* Clear all caller-saved regs that are not used to return.  */
25948     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25949       {
25950         /* CMSE_ENTRY always returns.  */
25951         gcc_assert (really_return);
25952         cmse_nonsecure_entry_clear_before_return ();
25953       }
25954
25955   if (!really_return)
25956     return;
25957
25958   if (crtl->calls_eh_return)
25959     emit_insn (gen_addsi3 (stack_pointer_rtx,
25960                            stack_pointer_rtx,
25961                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25962
25963   if (IS_STACKALIGN (func_type))
25964     /* Restore the original stack pointer.  Before prologue, the stack was
25965        realigned and the original stack pointer saved in r0.  For details,
25966        see comment in arm_expand_prologue.  */
25967     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25968
25969   emit_jump_insn (simple_return_rtx);
25970 }
25971
25972 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25973    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25974
25975 const char *
25976 thumb1_output_interwork (void)
25977 {
25978   const char * name;
25979   FILE *f = asm_out_file;
25980
25981   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25982   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25983               == SYMBOL_REF);
25984   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25985
25986   /* Generate code sequence to switch us into Thumb mode.  */
25987   /* The .code 32 directive has already been emitted by
25988      ASM_DECLARE_FUNCTION_NAME.  */
25989   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25990   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25991
25992   /* Generate a label, so that the debugger will notice the
25993      change in instruction sets.  This label is also used by
25994      the assembler to bypass the ARM code when this function
25995      is called from a Thumb encoded function elsewhere in the
25996      same file.  Hence the definition of STUB_NAME here must
25997      agree with the definition in gas/config/tc-arm.c.  */
25998
25999 #define STUB_NAME ".real_start_of"
26000
26001   fprintf (f, "\t.code\t16\n");
26002 #ifdef ARM_PE
26003   if (arm_dllexport_name_p (name))
26004     name = arm_strip_name_encoding (name);
26005 #endif
26006   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26007   fprintf (f, "\t.thumb_func\n");
26008   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26009
26010   return "";
26011 }
26012
26013 /* Handle the case of a double word load into a low register from
26014    a computed memory address.  The computed address may involve a
26015    register which is overwritten by the load.  */
26016 const char *
26017 thumb_load_double_from_address (rtx *operands)
26018 {
26019   rtx addr;
26020   rtx base;
26021   rtx offset;
26022   rtx arg1;
26023   rtx arg2;
26024
26025   gcc_assert (REG_P (operands[0]));
26026   gcc_assert (MEM_P (operands[1]));
26027
26028   /* Get the memory address.  */
26029   addr = XEXP (operands[1], 0);
26030
26031   /* Work out how the memory address is computed.  */
26032   switch (GET_CODE (addr))
26033     {
26034     case REG:
26035       operands[2] = adjust_address (operands[1], SImode, 4);
26036
26037       if (REGNO (operands[0]) == REGNO (addr))
26038         {
26039           output_asm_insn ("ldr\t%H0, %2", operands);
26040           output_asm_insn ("ldr\t%0, %1", operands);
26041         }
26042       else
26043         {
26044           output_asm_insn ("ldr\t%0, %1", operands);
26045           output_asm_insn ("ldr\t%H0, %2", operands);
26046         }
26047       break;
26048
26049     case CONST:
26050       /* Compute <address> + 4 for the high order load.  */
26051       operands[2] = adjust_address (operands[1], SImode, 4);
26052
26053       output_asm_insn ("ldr\t%0, %1", operands);
26054       output_asm_insn ("ldr\t%H0, %2", operands);
26055       break;
26056
26057     case PLUS:
26058       arg1   = XEXP (addr, 0);
26059       arg2   = XEXP (addr, 1);
26060
26061       if (CONSTANT_P (arg1))
26062         base = arg2, offset = arg1;
26063       else
26064         base = arg1, offset = arg2;
26065
26066       gcc_assert (REG_P (base));
26067
26068       /* Catch the case of <address> = <reg> + <reg> */
26069       if (REG_P (offset))
26070         {
26071           int reg_offset = REGNO (offset);
26072           int reg_base   = REGNO (base);
26073           int reg_dest   = REGNO (operands[0]);
26074
26075           /* Add the base and offset registers together into the
26076              higher destination register.  */
26077           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26078                        reg_dest + 1, reg_base, reg_offset);
26079
26080           /* Load the lower destination register from the address in
26081              the higher destination register.  */
26082           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26083                        reg_dest, reg_dest + 1);
26084
26085           /* Load the higher destination register from its own address
26086              plus 4.  */
26087           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26088                        reg_dest + 1, reg_dest + 1);
26089         }
26090       else
26091         {
26092           /* Compute <address> + 4 for the high order load.  */
26093           operands[2] = adjust_address (operands[1], SImode, 4);
26094
26095           /* If the computed address is held in the low order register
26096              then load the high order register first, otherwise always
26097              load the low order register first.  */
26098           if (REGNO (operands[0]) == REGNO (base))
26099             {
26100               output_asm_insn ("ldr\t%H0, %2", operands);
26101               output_asm_insn ("ldr\t%0, %1", operands);
26102             }
26103           else
26104             {
26105               output_asm_insn ("ldr\t%0, %1", operands);
26106               output_asm_insn ("ldr\t%H0, %2", operands);
26107             }
26108         }
26109       break;
26110
26111     case LABEL_REF:
26112       /* With no registers to worry about we can just load the value
26113          directly.  */
26114       operands[2] = adjust_address (operands[1], SImode, 4);
26115
26116       output_asm_insn ("ldr\t%H0, %2", operands);
26117       output_asm_insn ("ldr\t%0, %1", operands);
26118       break;
26119
26120     default:
26121       gcc_unreachable ();
26122     }
26123
26124   return "";
26125 }
26126
26127 const char *
26128 thumb_output_move_mem_multiple (int n, rtx *operands)
26129 {
26130   switch (n)
26131     {
26132     case 2:
26133       if (REGNO (operands[4]) > REGNO (operands[5]))
26134         std::swap (operands[4], operands[5]);
26135
26136       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26137       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26138       break;
26139
26140     case 3:
26141       if (REGNO (operands[4]) > REGNO (operands[5]))
26142         std::swap (operands[4], operands[5]);
26143       if (REGNO (operands[5]) > REGNO (operands[6]))
26144         std::swap (operands[5], operands[6]);
26145       if (REGNO (operands[4]) > REGNO (operands[5]))
26146         std::swap (operands[4], operands[5]);
26147
26148       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26149       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26150       break;
26151
26152     default:
26153       gcc_unreachable ();
26154     }
26155
26156   return "";
26157 }
26158
26159 /* Output a call-via instruction for thumb state.  */
26160 const char *
26161 thumb_call_via_reg (rtx reg)
26162 {
26163   int regno = REGNO (reg);
26164   rtx *labelp;
26165
26166   gcc_assert (regno < LR_REGNUM);
26167
26168   /* If we are in the normal text section we can use a single instance
26169      per compilation unit.  If we are doing function sections, then we need
26170      an entry per section, since we can't rely on reachability.  */
26171   if (in_section == text_section)
26172     {
26173       thumb_call_reg_needed = 1;
26174
26175       if (thumb_call_via_label[regno] == NULL)
26176         thumb_call_via_label[regno] = gen_label_rtx ();
26177       labelp = thumb_call_via_label + regno;
26178     }
26179   else
26180     {
26181       if (cfun->machine->call_via[regno] == NULL)
26182         cfun->machine->call_via[regno] = gen_label_rtx ();
26183       labelp = cfun->machine->call_via + regno;
26184     }
26185
26186   output_asm_insn ("bl\t%a0", labelp);
26187   return "";
26188 }
26189
26190 /* Routines for generating rtl.  */
26191 void
26192 thumb_expand_movmemqi (rtx *operands)
26193 {
26194   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26195   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26196   HOST_WIDE_INT len = INTVAL (operands[2]);
26197   HOST_WIDE_INT offset = 0;
26198
26199   while (len >= 12)
26200     {
26201       emit_insn (gen_movmem12b (out, in, out, in));
26202       len -= 12;
26203     }
26204
26205   if (len >= 8)
26206     {
26207       emit_insn (gen_movmem8b (out, in, out, in));
26208       len -= 8;
26209     }
26210
26211   if (len >= 4)
26212     {
26213       rtx reg = gen_reg_rtx (SImode);
26214       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26215       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26216       len -= 4;
26217       offset += 4;
26218     }
26219
26220   if (len >= 2)
26221     {
26222       rtx reg = gen_reg_rtx (HImode);
26223       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26224                                               plus_constant (Pmode, in,
26225                                                              offset))));
26226       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26227                                                                 offset)),
26228                             reg));
26229       len -= 2;
26230       offset += 2;
26231     }
26232
26233   if (len)
26234     {
26235       rtx reg = gen_reg_rtx (QImode);
26236       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26237                                               plus_constant (Pmode, in,
26238                                                              offset))));
26239       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26240                                                                 offset)),
26241                             reg));
26242     }
26243 }
26244
26245 void
26246 thumb_reload_out_hi (rtx *operands)
26247 {
26248   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26249 }
26250
26251 /* Return the length of a function name prefix
26252     that starts with the character 'c'.  */
26253 static int
26254 arm_get_strip_length (int c)
26255 {
26256   switch (c)
26257     {
26258     ARM_NAME_ENCODING_LENGTHS
26259       default: return 0;
26260     }
26261 }
26262
26263 /* Return a pointer to a function's name with any
26264    and all prefix encodings stripped from it.  */
26265 const char *
26266 arm_strip_name_encoding (const char *name)
26267 {
26268   int skip;
26269
26270   while ((skip = arm_get_strip_length (* name)))
26271     name += skip;
26272
26273   return name;
26274 }
26275
26276 /* If there is a '*' anywhere in the name's prefix, then
26277    emit the stripped name verbatim, otherwise prepend an
26278    underscore if leading underscores are being used.  */
26279 void
26280 arm_asm_output_labelref (FILE *stream, const char *name)
26281 {
26282   int skip;
26283   int verbatim = 0;
26284
26285   while ((skip = arm_get_strip_length (* name)))
26286     {
26287       verbatim |= (*name == '*');
26288       name += skip;
26289     }
26290
26291   if (verbatim)
26292     fputs (name, stream);
26293   else
26294     asm_fprintf (stream, "%U%s", name);
26295 }
26296
26297 /* This function is used to emit an EABI tag and its associated value.
26298    We emit the numerical value of the tag in case the assembler does not
26299    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26300    the tag name in a comment so that anyone reading the assembler output
26301    will know which tag is being set.
26302
26303    This function is not static because arm-c.c needs it too.  */
26304
26305 void
26306 arm_emit_eabi_attribute (const char *name, int num, int val)
26307 {
26308   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26309   if (flag_verbose_asm || flag_debug_asm)
26310     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26311   asm_fprintf (asm_out_file, "\n");
26312 }
26313
26314 /* This function is used to print CPU tuning information as comment
26315    in assembler file.  Pointers are not printed for now.  */
26316
26317 void
26318 arm_print_tune_info (void)
26319 {
26320   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26321   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26322                current_tune->constant_limit);
26323   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26324                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26325   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26326                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26327   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26328                "prefetch.l1_cache_size:\t%d\n",
26329                current_tune->prefetch.l1_cache_size);
26330   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26331                "prefetch.l1_cache_line_size:\t%d\n",
26332                current_tune->prefetch.l1_cache_line_size);
26333   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26334                "prefer_constant_pool:\t%d\n",
26335                (int) current_tune->prefer_constant_pool);
26336   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26337                "branch_cost:\t(s:speed, p:predictable)\n");
26338   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26339   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26340                current_tune->branch_cost (false, false));
26341   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26342                current_tune->branch_cost (false, true));
26343   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26344                current_tune->branch_cost (true, false));
26345   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26346                current_tune->branch_cost (true, true));
26347   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26348                "prefer_ldrd_strd:\t%d\n",
26349                (int) current_tune->prefer_ldrd_strd);
26350   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26351                "logical_op_non_short_circuit:\t[%d,%d]\n",
26352                (int) current_tune->logical_op_non_short_circuit_thumb,
26353                (int) current_tune->logical_op_non_short_circuit_arm);
26354   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26355                "prefer_neon_for_64bits:\t%d\n",
26356                (int) current_tune->prefer_neon_for_64bits);
26357   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26358                "disparage_flag_setting_t16_encodings:\t%d\n",
26359                (int) current_tune->disparage_flag_setting_t16_encodings);
26360   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26361                "string_ops_prefer_neon:\t%d\n",
26362                (int) current_tune->string_ops_prefer_neon);
26363   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26364                "max_insns_inline_memset:\t%d\n",
26365                current_tune->max_insns_inline_memset);
26366   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26367                current_tune->fusible_ops);
26368   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26369                (int) current_tune->sched_autopref);
26370 }
26371
26372 /* Print .arch and .arch_extension directives corresponding to the
26373    current architecture configuration.  */
26374 static void
26375 arm_print_asm_arch_directives ()
26376 {
26377   const arch_option *arch
26378     = arm_parse_arch_option_name (all_architectures, "-march",
26379                                   arm_active_target.arch_name);
26380   auto_sbitmap opt_bits (isa_num_bits);
26381
26382   gcc_assert (arch);
26383
26384   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26385   arm_last_printed_arch_string = arm_active_target.arch_name;
26386   if (!arch->common.extensions)
26387     return;
26388
26389   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26390        opt->name != NULL;
26391        opt++)
26392     {
26393       if (!opt->remove)
26394         {
26395           arm_initialize_isa (opt_bits, opt->isa_bits);
26396
26397           /* If every feature bit of this option is set in the target
26398              ISA specification, print out the option name.  However,
26399              don't print anything if all the bits are part of the
26400              FPU specification.  */
26401           if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26402               && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26403             asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26404         }
26405     }
26406 }
26407
26408 static void
26409 arm_file_start (void)
26410 {
26411   int val;
26412
26413   if (TARGET_BPABI)
26414     {
26415       /* We don't have a specified CPU.  Use the architecture to
26416          generate the tags.
26417
26418          Note: it might be better to do this unconditionally, then the
26419          assembler would not need to know about all new CPU names as
26420          they are added.  */
26421       if (!arm_active_target.core_name)
26422         {
26423           /* armv7ve doesn't support any extensions.  */
26424           if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26425             {
26426               /* Keep backward compatability for assemblers
26427                  which don't support armv7ve.  */
26428               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26429               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26430               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26431               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26432               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26433               arm_last_printed_arch_string = "armv7ve";
26434             }
26435           else
26436             arm_print_asm_arch_directives ();
26437         }
26438       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26439         {
26440           asm_fprintf (asm_out_file, "\t.arch %s\n",
26441                        arm_active_target.core_name + 8);
26442           arm_last_printed_arch_string = arm_active_target.core_name + 8;
26443         }
26444       else
26445         {
26446           const char* truncated_name
26447             = arm_rewrite_selected_cpu (arm_active_target.core_name);
26448           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26449         }
26450
26451       if (print_tune_info)
26452         arm_print_tune_info ();
26453
26454       if (! TARGET_SOFT_FLOAT)
26455         {
26456           if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26457             arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26458
26459           if (TARGET_HARD_FLOAT_ABI)
26460             arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26461         }
26462
26463       /* Some of these attributes only apply when the corresponding features
26464          are used.  However we don't have any easy way of figuring this out.
26465          Conservatively record the setting that would have been used.  */
26466
26467       if (flag_rounding_math)
26468         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26469
26470       if (!flag_unsafe_math_optimizations)
26471         {
26472           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26473           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26474         }
26475       if (flag_signaling_nans)
26476         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26477
26478       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26479                            flag_finite_math_only ? 1 : 3);
26480
26481       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26482       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26483       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26484                                flag_short_enums ? 1 : 2);
26485
26486       /* Tag_ABI_optimization_goals.  */
26487       if (optimize_size)
26488         val = 4;
26489       else if (optimize >= 2)
26490         val = 2;
26491       else if (optimize)
26492         val = 1;
26493       else
26494         val = 6;
26495       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26496
26497       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26498                                unaligned_access);
26499
26500       if (arm_fp16_format)
26501         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26502                              (int) arm_fp16_format);
26503
26504       if (arm_lang_output_object_attributes_hook)
26505         arm_lang_output_object_attributes_hook();
26506     }
26507
26508   default_file_start ();
26509 }
26510
26511 static void
26512 arm_file_end (void)
26513 {
26514   int regno;
26515
26516   if (NEED_INDICATE_EXEC_STACK)
26517     /* Add .note.GNU-stack.  */
26518     file_end_indicate_exec_stack ();
26519
26520   if (! thumb_call_reg_needed)
26521     return;
26522
26523   switch_to_section (text_section);
26524   asm_fprintf (asm_out_file, "\t.code 16\n");
26525   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26526
26527   for (regno = 0; regno < LR_REGNUM; regno++)
26528     {
26529       rtx label = thumb_call_via_label[regno];
26530
26531       if (label != 0)
26532         {
26533           targetm.asm_out.internal_label (asm_out_file, "L",
26534                                           CODE_LABEL_NUMBER (label));
26535           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26536         }
26537     }
26538 }
26539
26540 #ifndef ARM_PE
26541 /* Symbols in the text segment can be accessed without indirecting via the
26542    constant pool; it may take an extra binary operation, but this is still
26543    faster than indirecting via memory.  Don't do this when not optimizing,
26544    since we won't be calculating al of the offsets necessary to do this
26545    simplification.  */
26546
26547 static void
26548 arm_encode_section_info (tree decl, rtx rtl, int first)
26549 {
26550   if (optimize > 0 && TREE_CONSTANT (decl))
26551     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26552
26553   default_encode_section_info (decl, rtl, first);
26554 }
26555 #endif /* !ARM_PE */
26556
26557 static void
26558 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26559 {
26560   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26561       && !strcmp (prefix, "L"))
26562     {
26563       arm_ccfsm_state = 0;
26564       arm_target_insn = NULL;
26565     }
26566   default_internal_label (stream, prefix, labelno);
26567 }
26568
26569 /* Output code to add DELTA to the first argument, and then jump
26570    to FUNCTION.  Used for C++ multiple inheritance.  */
26571
26572 static void
26573 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26574                      HOST_WIDE_INT, tree function)
26575 {
26576   static int thunk_label = 0;
26577   char label[256];
26578   char labelpc[256];
26579   int mi_delta = delta;
26580   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26581   int shift = 0;
26582   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26583                     ? 1 : 0);
26584   if (mi_delta < 0)
26585     mi_delta = - mi_delta;
26586
26587   final_start_function (emit_barrier (), file, 1);
26588
26589   if (TARGET_THUMB1)
26590     {
26591       int labelno = thunk_label++;
26592       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26593       /* Thunks are entered in arm mode when available.  */
26594       if (TARGET_THUMB1_ONLY)
26595         {
26596           /* push r3 so we can use it as a temporary.  */
26597           /* TODO: Omit this save if r3 is not used.  */
26598           fputs ("\tpush {r3}\n", file);
26599           fputs ("\tldr\tr3, ", file);
26600         }
26601       else
26602         {
26603           fputs ("\tldr\tr12, ", file);
26604         }
26605       assemble_name (file, label);
26606       fputc ('\n', file);
26607       if (flag_pic)
26608         {
26609           /* If we are generating PIC, the ldr instruction below loads
26610              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26611              the address of the add + 8, so we have:
26612
26613              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26614                  = target + 1.
26615
26616              Note that we have "+ 1" because some versions of GNU ld
26617              don't set the low bit of the result for R_ARM_REL32
26618              relocations against thumb function symbols.
26619              On ARMv6M this is +4, not +8.  */
26620           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26621           assemble_name (file, labelpc);
26622           fputs (":\n", file);
26623           if (TARGET_THUMB1_ONLY)
26624             {
26625               /* This is 2 insns after the start of the thunk, so we know it
26626                  is 4-byte aligned.  */
26627               fputs ("\tadd\tr3, pc, r3\n", file);
26628               fputs ("\tmov r12, r3\n", file);
26629             }
26630           else
26631             fputs ("\tadd\tr12, pc, r12\n", file);
26632         }
26633       else if (TARGET_THUMB1_ONLY)
26634         fputs ("\tmov r12, r3\n", file);
26635     }
26636   if (TARGET_THUMB1_ONLY)
26637     {
26638       if (mi_delta > 255)
26639         {
26640           fputs ("\tldr\tr3, ", file);
26641           assemble_name (file, label);
26642           fputs ("+4\n", file);
26643           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26644                        mi_op, this_regno, this_regno);
26645         }
26646       else if (mi_delta != 0)
26647         {
26648           /* Thumb1 unified syntax requires s suffix in instruction name when
26649              one of the operands is immediate.  */
26650           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26651                        mi_op, this_regno, this_regno,
26652                        mi_delta);
26653         }
26654     }
26655   else
26656     {
26657       /* TODO: Use movw/movt for large constants when available.  */
26658       while (mi_delta != 0)
26659         {
26660           if ((mi_delta & (3 << shift)) == 0)
26661             shift += 2;
26662           else
26663             {
26664               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26665                            mi_op, this_regno, this_regno,
26666                            mi_delta & (0xff << shift));
26667               mi_delta &= ~(0xff << shift);
26668               shift += 8;
26669             }
26670         }
26671     }
26672   if (TARGET_THUMB1)
26673     {
26674       if (TARGET_THUMB1_ONLY)
26675         fputs ("\tpop\t{r3}\n", file);
26676
26677       fprintf (file, "\tbx\tr12\n");
26678       ASM_OUTPUT_ALIGN (file, 2);
26679       assemble_name (file, label);
26680       fputs (":\n", file);
26681       if (flag_pic)
26682         {
26683           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26684           rtx tem = XEXP (DECL_RTL (function), 0);
26685           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26686              pipeline offset is four rather than eight.  Adjust the offset
26687              accordingly.  */
26688           tem = plus_constant (GET_MODE (tem), tem,
26689                                TARGET_THUMB1_ONLY ? -3 : -7);
26690           tem = gen_rtx_MINUS (GET_MODE (tem),
26691                                tem,
26692                                gen_rtx_SYMBOL_REF (Pmode,
26693                                                    ggc_strdup (labelpc)));
26694           assemble_integer (tem, 4, BITS_PER_WORD, 1);
26695         }
26696       else
26697         /* Output ".word .LTHUNKn".  */
26698         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26699
26700       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26701         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26702     }
26703   else
26704     {
26705       fputs ("\tb\t", file);
26706       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26707       if (NEED_PLT_RELOC)
26708         fputs ("(PLT)", file);
26709       fputc ('\n', file);
26710     }
26711
26712   final_end_function ();
26713 }
26714
26715 /* MI thunk handling for TARGET_32BIT.  */
26716
26717 static void
26718 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26719                        HOST_WIDE_INT vcall_offset, tree function)
26720 {
26721   /* On ARM, this_regno is R0 or R1 depending on
26722      whether the function returns an aggregate or not.
26723   */
26724   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26725                                        function)
26726                     ? R1_REGNUM : R0_REGNUM);
26727
26728   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26729   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26730   reload_completed = 1;
26731   emit_note (NOTE_INSN_PROLOGUE_END);
26732
26733   /* Add DELTA to THIS_RTX.  */
26734   if (delta != 0)
26735     arm_split_constant (PLUS, Pmode, NULL_RTX,
26736                         delta, this_rtx, this_rtx, false);
26737
26738   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26739   if (vcall_offset != 0)
26740     {
26741       /* Load *THIS_RTX.  */
26742       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26743       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26744       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26745                           false);
26746       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26747       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26748       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26749     }
26750
26751   /* Generate a tail call to the target function.  */
26752   if (!TREE_USED (function))
26753     {
26754       assemble_external (function);
26755       TREE_USED (function) = 1;
26756     }
26757   rtx funexp = XEXP (DECL_RTL (function), 0);
26758   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26759   rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26760   SIBLING_CALL_P (insn) = 1;
26761
26762   insn = get_insns ();
26763   shorten_branches (insn);
26764   final_start_function (insn, file, 1);
26765   final (insn, file, 1);
26766   final_end_function ();
26767
26768   /* Stop pretending this is a post-reload pass.  */
26769   reload_completed = 0;
26770 }
26771
26772 /* Output code to add DELTA to the first argument, and then jump
26773    to FUNCTION.  Used for C++ multiple inheritance.  */
26774
26775 static void
26776 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26777                      HOST_WIDE_INT vcall_offset, tree function)
26778 {
26779   if (TARGET_32BIT)
26780     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26781   else
26782     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26783 }
26784
26785 int
26786 arm_emit_vector_const (FILE *file, rtx x)
26787 {
26788   int i;
26789   const char * pattern;
26790
26791   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26792
26793   switch (GET_MODE (x))
26794     {
26795     case E_V2SImode: pattern = "%08x"; break;
26796     case E_V4HImode: pattern = "%04x"; break;
26797     case E_V8QImode: pattern = "%02x"; break;
26798     default:       gcc_unreachable ();
26799     }
26800
26801   fprintf (file, "0x");
26802   for (i = CONST_VECTOR_NUNITS (x); i--;)
26803     {
26804       rtx element;
26805
26806       element = CONST_VECTOR_ELT (x, i);
26807       fprintf (file, pattern, INTVAL (element));
26808     }
26809
26810   return 1;
26811 }
26812
26813 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26814    HFmode constant pool entries are actually loaded with ldr.  */
26815 void
26816 arm_emit_fp16_const (rtx c)
26817 {
26818   long bits;
26819
26820   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26821   if (WORDS_BIG_ENDIAN)
26822     assemble_zeros (2);
26823   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26824   if (!WORDS_BIG_ENDIAN)
26825     assemble_zeros (2);
26826 }
26827
26828 const char *
26829 arm_output_load_gr (rtx *operands)
26830 {
26831   rtx reg;
26832   rtx offset;
26833   rtx wcgr;
26834   rtx sum;
26835
26836   if (!MEM_P (operands [1])
26837       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26838       || !REG_P (reg = XEXP (sum, 0))
26839       || !CONST_INT_P (offset = XEXP (sum, 1))
26840       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26841     return "wldrw%?\t%0, %1";
26842
26843   /* Fix up an out-of-range load of a GR register.  */
26844   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26845   wcgr = operands[0];
26846   operands[0] = reg;
26847   output_asm_insn ("ldr%?\t%0, %1", operands);
26848
26849   operands[0] = wcgr;
26850   operands[1] = reg;
26851   output_asm_insn ("tmcr%?\t%0, %1", operands);
26852   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26853
26854   return "";
26855 }
26856
26857 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26858
26859    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26860    named arg and all anonymous args onto the stack.
26861    XXX I know the prologue shouldn't be pushing registers, but it is faster
26862    that way.  */
26863
26864 static void
26865 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26866                             machine_mode mode,
26867                             tree type,
26868                             int *pretend_size,
26869                             int second_time ATTRIBUTE_UNUSED)
26870 {
26871   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26872   int nregs;
26873
26874   cfun->machine->uses_anonymous_args = 1;
26875   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26876     {
26877       nregs = pcum->aapcs_ncrn;
26878       if (nregs & 1)
26879         {
26880           int res = arm_needs_doubleword_align (mode, type);
26881           if (res < 0 && warn_psabi)
26882             inform (input_location, "parameter passing for argument of "
26883                     "type %qT changed in GCC 7.1", type);
26884           else if (res > 0)
26885             nregs++;
26886         }
26887     }
26888   else
26889     nregs = pcum->nregs;
26890
26891   if (nregs < NUM_ARG_REGS)
26892     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26893 }
26894
26895 /* We can't rely on the caller doing the proper promotion when
26896    using APCS or ATPCS.  */
26897
26898 static bool
26899 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26900 {
26901     return !TARGET_AAPCS_BASED;
26902 }
26903
26904 static machine_mode
26905 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26906                            machine_mode mode,
26907                            int *punsignedp ATTRIBUTE_UNUSED,
26908                            const_tree fntype ATTRIBUTE_UNUSED,
26909                            int for_return ATTRIBUTE_UNUSED)
26910 {
26911   if (GET_MODE_CLASS (mode) == MODE_INT
26912       && GET_MODE_SIZE (mode) < 4)
26913     return SImode;
26914
26915   return mode;
26916 }
26917
26918
26919 static bool
26920 arm_default_short_enums (void)
26921 {
26922   return ARM_DEFAULT_SHORT_ENUMS;
26923 }
26924
26925
26926 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26927
26928 static bool
26929 arm_align_anon_bitfield (void)
26930 {
26931   return TARGET_AAPCS_BASED;
26932 }
26933
26934
26935 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26936
26937 static tree
26938 arm_cxx_guard_type (void)
26939 {
26940   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26941 }
26942
26943
26944 /* The EABI says test the least significant bit of a guard variable.  */
26945
26946 static bool
26947 arm_cxx_guard_mask_bit (void)
26948 {
26949   return TARGET_AAPCS_BASED;
26950 }
26951
26952
26953 /* The EABI specifies that all array cookies are 8 bytes long.  */
26954
26955 static tree
26956 arm_get_cookie_size (tree type)
26957 {
26958   tree size;
26959
26960   if (!TARGET_AAPCS_BASED)
26961     return default_cxx_get_cookie_size (type);
26962
26963   size = build_int_cst (sizetype, 8);
26964   return size;
26965 }
26966
26967
26968 /* The EABI says that array cookies should also contain the element size.  */
26969
26970 static bool
26971 arm_cookie_has_size (void)
26972 {
26973   return TARGET_AAPCS_BASED;
26974 }
26975
26976
26977 /* The EABI says constructors and destructors should return a pointer to
26978    the object constructed/destroyed.  */
26979
26980 static bool
26981 arm_cxx_cdtor_returns_this (void)
26982 {
26983   return TARGET_AAPCS_BASED;
26984 }
26985
26986 /* The EABI says that an inline function may never be the key
26987    method.  */
26988
26989 static bool
26990 arm_cxx_key_method_may_be_inline (void)
26991 {
26992   return !TARGET_AAPCS_BASED;
26993 }
26994
26995 static void
26996 arm_cxx_determine_class_data_visibility (tree decl)
26997 {
26998   if (!TARGET_AAPCS_BASED
26999       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27000     return;
27001
27002   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27003      is exported.  However, on systems without dynamic vague linkage,
27004      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
27005   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27006     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27007   else
27008     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27009   DECL_VISIBILITY_SPECIFIED (decl) = 1;
27010 }
27011
27012 static bool
27013 arm_cxx_class_data_always_comdat (void)
27014 {
27015   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27016      vague linkage if the class has no key function.  */
27017   return !TARGET_AAPCS_BASED;
27018 }
27019
27020
27021 /* The EABI says __aeabi_atexit should be used to register static
27022    destructors.  */
27023
27024 static bool
27025 arm_cxx_use_aeabi_atexit (void)
27026 {
27027   return TARGET_AAPCS_BASED;
27028 }
27029
27030
27031 void
27032 arm_set_return_address (rtx source, rtx scratch)
27033 {
27034   arm_stack_offsets *offsets;
27035   HOST_WIDE_INT delta;
27036   rtx addr, mem;
27037   unsigned long saved_regs;
27038
27039   offsets = arm_get_frame_offsets ();
27040   saved_regs = offsets->saved_regs_mask;
27041
27042   if ((saved_regs & (1 << LR_REGNUM)) == 0)
27043     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27044   else
27045     {
27046       if (frame_pointer_needed)
27047         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27048       else
27049         {
27050           /* LR will be the first saved register.  */
27051           delta = offsets->outgoing_args - (offsets->frame + 4);
27052
27053
27054           if (delta >= 4096)
27055             {
27056               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27057                                      GEN_INT (delta & ~4095)));
27058               addr = scratch;
27059               delta &= 4095;
27060             }
27061           else
27062             addr = stack_pointer_rtx;
27063
27064           addr = plus_constant (Pmode, addr, delta);
27065         }
27066
27067       /* The store needs to be marked to prevent DSE from deleting
27068          it as dead if it is based on fp.  */
27069       mem = gen_frame_mem (Pmode, addr);
27070       MEM_VOLATILE_P (mem) = true;
27071       emit_move_insn (mem, source);
27072     }
27073 }
27074
27075
27076 void
27077 thumb_set_return_address (rtx source, rtx scratch)
27078 {
27079   arm_stack_offsets *offsets;
27080   HOST_WIDE_INT delta;
27081   HOST_WIDE_INT limit;
27082   int reg;
27083   rtx addr, mem;
27084   unsigned long mask;
27085
27086   emit_use (source);
27087
27088   offsets = arm_get_frame_offsets ();
27089   mask = offsets->saved_regs_mask;
27090   if (mask & (1 << LR_REGNUM))
27091     {
27092       limit = 1024;
27093       /* Find the saved regs.  */
27094       if (frame_pointer_needed)
27095         {
27096           delta = offsets->soft_frame - offsets->saved_args;
27097           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27098           if (TARGET_THUMB1)
27099             limit = 128;
27100         }
27101       else
27102         {
27103           delta = offsets->outgoing_args - offsets->saved_args;
27104           reg = SP_REGNUM;
27105         }
27106       /* Allow for the stack frame.  */
27107       if (TARGET_THUMB1 && TARGET_BACKTRACE)
27108         delta -= 16;
27109       /* The link register is always the first saved register.  */
27110       delta -= 4;
27111
27112       /* Construct the address.  */
27113       addr = gen_rtx_REG (SImode, reg);
27114       if (delta > limit)
27115         {
27116           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27117           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27118           addr = scratch;
27119         }
27120       else
27121         addr = plus_constant (Pmode, addr, delta);
27122
27123       /* The store needs to be marked to prevent DSE from deleting
27124          it as dead if it is based on fp.  */
27125       mem = gen_frame_mem (Pmode, addr);
27126       MEM_VOLATILE_P (mem) = true;
27127       emit_move_insn (mem, source);
27128     }
27129   else
27130     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27131 }
27132
27133 /* Implements target hook vector_mode_supported_p.  */
27134 bool
27135 arm_vector_mode_supported_p (machine_mode mode)
27136 {
27137   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27138   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27139       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27140       || mode == V2DImode || mode == V8HFmode))
27141     return true;
27142
27143   if ((TARGET_NEON || TARGET_IWMMXT)
27144       && ((mode == V2SImode)
27145           || (mode == V4HImode)
27146           || (mode == V8QImode)))
27147     return true;
27148
27149   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27150       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27151       || mode == V2HAmode))
27152     return true;
27153
27154   return false;
27155 }
27156
27157 /* Implements target hook array_mode_supported_p.  */
27158
27159 static bool
27160 arm_array_mode_supported_p (machine_mode mode,
27161                             unsigned HOST_WIDE_INT nelems)
27162 {
27163   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27164      for now, as the lane-swapping logic needs to be extended in the expanders.
27165      See PR target/82518.  */
27166   if (TARGET_NEON && !BYTES_BIG_ENDIAN
27167       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27168       && (nelems >= 2 && nelems <= 4))
27169     return true;
27170
27171   return false;
27172 }
27173
27174 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27175    registers when autovectorizing for Neon, at least until multiple vector
27176    widths are supported properly by the middle-end.  */
27177
27178 static machine_mode
27179 arm_preferred_simd_mode (scalar_mode mode)
27180 {
27181   if (TARGET_NEON)
27182     switch (mode)
27183       {
27184       case E_SFmode:
27185         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27186       case E_SImode:
27187         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27188       case E_HImode:
27189         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27190       case E_QImode:
27191         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27192       case E_DImode:
27193         if (!TARGET_NEON_VECTORIZE_DOUBLE)
27194           return V2DImode;
27195         break;
27196
27197       default:;
27198       }
27199
27200   if (TARGET_REALLY_IWMMXT)
27201     switch (mode)
27202       {
27203       case E_SImode:
27204         return V2SImode;
27205       case E_HImode:
27206         return V4HImode;
27207       case E_QImode:
27208         return V8QImode;
27209
27210       default:;
27211       }
27212
27213   return word_mode;
27214 }
27215
27216 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27217
27218    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27219    using r0-r4 for function arguments, r7 for the stack frame and don't have
27220    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27221    potentially problematic instructions accept high registers so this is not
27222    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27223    that require many low registers.  */
27224 static bool
27225 arm_class_likely_spilled_p (reg_class_t rclass)
27226 {
27227   if ((TARGET_THUMB1 && rclass == LO_REGS)
27228       || rclass  == CC_REG)
27229     return true;
27230
27231   return false;
27232 }
27233
27234 /* Implements target hook small_register_classes_for_mode_p.  */
27235 bool
27236 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27237 {
27238   return TARGET_THUMB1;
27239 }
27240
27241 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27242    ARM insns and therefore guarantee that the shift count is modulo 256.
27243    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27244    guarantee no particular behavior for out-of-range counts.  */
27245
27246 static unsigned HOST_WIDE_INT
27247 arm_shift_truncation_mask (machine_mode mode)
27248 {
27249   return mode == SImode ? 255 : 0;
27250 }
27251
27252
27253 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27254
27255 unsigned int
27256 arm_dbx_register_number (unsigned int regno)
27257 {
27258   if (regno < 16)
27259     return regno;
27260
27261   if (IS_VFP_REGNUM (regno))
27262     {
27263       /* See comment in arm_dwarf_register_span.  */
27264       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27265         return 64 + regno - FIRST_VFP_REGNUM;
27266       else
27267         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27268     }
27269
27270   if (IS_IWMMXT_GR_REGNUM (regno))
27271     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27272
27273   if (IS_IWMMXT_REGNUM (regno))
27274     return 112 + regno - FIRST_IWMMXT_REGNUM;
27275
27276   return DWARF_FRAME_REGISTERS;
27277 }
27278
27279 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27280    GCC models tham as 64 32-bit registers, so we need to describe this to
27281    the DWARF generation code.  Other registers can use the default.  */
27282 static rtx
27283 arm_dwarf_register_span (rtx rtl)
27284 {
27285   machine_mode mode;
27286   unsigned regno;
27287   rtx parts[16];
27288   int nregs;
27289   int i;
27290
27291   regno = REGNO (rtl);
27292   if (!IS_VFP_REGNUM (regno))
27293     return NULL_RTX;
27294
27295   /* XXX FIXME: The EABI defines two VFP register ranges:
27296         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27297         256-287: D0-D31
27298      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27299      corresponding D register.  Until GDB supports this, we shall use the
27300      legacy encodings.  We also use these encodings for D0-D15 for
27301      compatibility with older debuggers.  */
27302   mode = GET_MODE (rtl);
27303   if (GET_MODE_SIZE (mode) < 8)
27304     return NULL_RTX;
27305
27306   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27307     {
27308       nregs = GET_MODE_SIZE (mode) / 4;
27309       for (i = 0; i < nregs; i += 2)
27310         if (TARGET_BIG_END)
27311           {
27312             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27313             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27314           }
27315         else
27316           {
27317             parts[i] = gen_rtx_REG (SImode, regno + i);
27318             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27319           }
27320     }
27321   else
27322     {
27323       nregs = GET_MODE_SIZE (mode) / 8;
27324       for (i = 0; i < nregs; i++)
27325         parts[i] = gen_rtx_REG (DImode, regno + i);
27326     }
27327
27328   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27329 }
27330
27331 #if ARM_UNWIND_INFO
27332 /* Emit unwind directives for a store-multiple instruction or stack pointer
27333    push during alignment.
27334    These should only ever be generated by the function prologue code, so
27335    expect them to have a particular form.
27336    The store-multiple instruction sometimes pushes pc as the last register,
27337    although it should not be tracked into unwind information, or for -Os
27338    sometimes pushes some dummy registers before first register that needs
27339    to be tracked in unwind information; such dummy registers are there just
27340    to avoid separate stack adjustment, and will not be restored in the
27341    epilogue.  */
27342
27343 static void
27344 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27345 {
27346   int i;
27347   HOST_WIDE_INT offset;
27348   HOST_WIDE_INT nregs;
27349   int reg_size;
27350   unsigned reg;
27351   unsigned lastreg;
27352   unsigned padfirst = 0, padlast = 0;
27353   rtx e;
27354
27355   e = XVECEXP (p, 0, 0);
27356   gcc_assert (GET_CODE (e) == SET);
27357
27358   /* First insn will adjust the stack pointer.  */
27359   gcc_assert (GET_CODE (e) == SET
27360               && REG_P (SET_DEST (e))
27361               && REGNO (SET_DEST (e)) == SP_REGNUM
27362               && GET_CODE (SET_SRC (e)) == PLUS);
27363
27364   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27365   nregs = XVECLEN (p, 0) - 1;
27366   gcc_assert (nregs);
27367
27368   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27369   if (reg < 16)
27370     {
27371       /* For -Os dummy registers can be pushed at the beginning to
27372          avoid separate stack pointer adjustment.  */
27373       e = XVECEXP (p, 0, 1);
27374       e = XEXP (SET_DEST (e), 0);
27375       if (GET_CODE (e) == PLUS)
27376         padfirst = INTVAL (XEXP (e, 1));
27377       gcc_assert (padfirst == 0 || optimize_size);
27378       /* The function prologue may also push pc, but not annotate it as it is
27379          never restored.  We turn this into a stack pointer adjustment.  */
27380       e = XVECEXP (p, 0, nregs);
27381       e = XEXP (SET_DEST (e), 0);
27382       if (GET_CODE (e) == PLUS)
27383         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27384       else
27385         padlast = offset - 4;
27386       gcc_assert (padlast == 0 || padlast == 4);
27387       if (padlast == 4)
27388         fprintf (asm_out_file, "\t.pad #4\n");
27389       reg_size = 4;
27390       fprintf (asm_out_file, "\t.save {");
27391     }
27392   else if (IS_VFP_REGNUM (reg))
27393     {
27394       reg_size = 8;
27395       fprintf (asm_out_file, "\t.vsave {");
27396     }
27397   else
27398     /* Unknown register type.  */
27399     gcc_unreachable ();
27400
27401   /* If the stack increment doesn't match the size of the saved registers,
27402      something has gone horribly wrong.  */
27403   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27404
27405   offset = padfirst;
27406   lastreg = 0;
27407   /* The remaining insns will describe the stores.  */
27408   for (i = 1; i <= nregs; i++)
27409     {
27410       /* Expect (set (mem <addr>) (reg)).
27411          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27412       e = XVECEXP (p, 0, i);
27413       gcc_assert (GET_CODE (e) == SET
27414                   && MEM_P (SET_DEST (e))
27415                   && REG_P (SET_SRC (e)));
27416
27417       reg = REGNO (SET_SRC (e));
27418       gcc_assert (reg >= lastreg);
27419
27420       if (i != 1)
27421         fprintf (asm_out_file, ", ");
27422       /* We can't use %r for vfp because we need to use the
27423          double precision register names.  */
27424       if (IS_VFP_REGNUM (reg))
27425         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27426       else
27427         asm_fprintf (asm_out_file, "%r", reg);
27428
27429       if (flag_checking)
27430         {
27431           /* Check that the addresses are consecutive.  */
27432           e = XEXP (SET_DEST (e), 0);
27433           if (GET_CODE (e) == PLUS)
27434             gcc_assert (REG_P (XEXP (e, 0))
27435                         && REGNO (XEXP (e, 0)) == SP_REGNUM
27436                         && CONST_INT_P (XEXP (e, 1))
27437                         && offset == INTVAL (XEXP (e, 1)));
27438           else
27439             gcc_assert (i == 1
27440                         && REG_P (e)
27441                         && REGNO (e) == SP_REGNUM);
27442           offset += reg_size;
27443         }
27444     }
27445   fprintf (asm_out_file, "}\n");
27446   if (padfirst)
27447     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27448 }
27449
27450 /*  Emit unwind directives for a SET.  */
27451
27452 static void
27453 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27454 {
27455   rtx e0;
27456   rtx e1;
27457   unsigned reg;
27458
27459   e0 = XEXP (p, 0);
27460   e1 = XEXP (p, 1);
27461   switch (GET_CODE (e0))
27462     {
27463     case MEM:
27464       /* Pushing a single register.  */
27465       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27466           || !REG_P (XEXP (XEXP (e0, 0), 0))
27467           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27468         abort ();
27469
27470       asm_fprintf (asm_out_file, "\t.save ");
27471       if (IS_VFP_REGNUM (REGNO (e1)))
27472         asm_fprintf(asm_out_file, "{d%d}\n",
27473                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27474       else
27475         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27476       break;
27477
27478     case REG:
27479       if (REGNO (e0) == SP_REGNUM)
27480         {
27481           /* A stack increment.  */
27482           if (GET_CODE (e1) != PLUS
27483               || !REG_P (XEXP (e1, 0))
27484               || REGNO (XEXP (e1, 0)) != SP_REGNUM
27485               || !CONST_INT_P (XEXP (e1, 1)))
27486             abort ();
27487
27488           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27489                        -INTVAL (XEXP (e1, 1)));
27490         }
27491       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27492         {
27493           HOST_WIDE_INT offset;
27494
27495           if (GET_CODE (e1) == PLUS)
27496             {
27497               if (!REG_P (XEXP (e1, 0))
27498                   || !CONST_INT_P (XEXP (e1, 1)))
27499                 abort ();
27500               reg = REGNO (XEXP (e1, 0));
27501               offset = INTVAL (XEXP (e1, 1));
27502               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27503                            HARD_FRAME_POINTER_REGNUM, reg,
27504                            offset);
27505             }
27506           else if (REG_P (e1))
27507             {
27508               reg = REGNO (e1);
27509               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27510                            HARD_FRAME_POINTER_REGNUM, reg);
27511             }
27512           else
27513             abort ();
27514         }
27515       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27516         {
27517           /* Move from sp to reg.  */
27518           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27519         }
27520      else if (GET_CODE (e1) == PLUS
27521               && REG_P (XEXP (e1, 0))
27522               && REGNO (XEXP (e1, 0)) == SP_REGNUM
27523               && CONST_INT_P (XEXP (e1, 1)))
27524         {
27525           /* Set reg to offset from sp.  */
27526           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27527                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27528         }
27529       else
27530         abort ();
27531       break;
27532
27533     default:
27534       abort ();
27535     }
27536 }
27537
27538
27539 /* Emit unwind directives for the given insn.  */
27540
27541 static void
27542 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27543 {
27544   rtx note, pat;
27545   bool handled_one = false;
27546
27547   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27548     return;
27549
27550   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27551       && (TREE_NOTHROW (current_function_decl)
27552           || crtl->all_throwers_are_sibcalls))
27553     return;
27554
27555   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27556     return;
27557
27558   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27559     {
27560       switch (REG_NOTE_KIND (note))
27561         {
27562         case REG_FRAME_RELATED_EXPR:
27563           pat = XEXP (note, 0);
27564           goto found;
27565
27566         case REG_CFA_REGISTER:
27567           pat = XEXP (note, 0);
27568           if (pat == NULL)
27569             {
27570               pat = PATTERN (insn);
27571               if (GET_CODE (pat) == PARALLEL)
27572                 pat = XVECEXP (pat, 0, 0);
27573             }
27574
27575           /* Only emitted for IS_STACKALIGN re-alignment.  */
27576           {
27577             rtx dest, src;
27578             unsigned reg;
27579
27580             src = SET_SRC (pat);
27581             dest = SET_DEST (pat);
27582
27583             gcc_assert (src == stack_pointer_rtx);
27584             reg = REGNO (dest);
27585             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27586                          reg + 0x90, reg);
27587           }
27588           handled_one = true;
27589           break;
27590
27591         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27592            to get correct dwarf information for shrink-wrap.  We should not
27593            emit unwind information for it because these are used either for
27594            pretend arguments or notes to adjust sp and restore registers from
27595            stack.  */
27596         case REG_CFA_DEF_CFA:
27597         case REG_CFA_ADJUST_CFA:
27598         case REG_CFA_RESTORE:
27599           return;
27600
27601         case REG_CFA_EXPRESSION:
27602         case REG_CFA_OFFSET:
27603           /* ??? Only handling here what we actually emit.  */
27604           gcc_unreachable ();
27605
27606         default:
27607           break;
27608         }
27609     }
27610   if (handled_one)
27611     return;
27612   pat = PATTERN (insn);
27613  found:
27614
27615   switch (GET_CODE (pat))
27616     {
27617     case SET:
27618       arm_unwind_emit_set (asm_out_file, pat);
27619       break;
27620
27621     case SEQUENCE:
27622       /* Store multiple.  */
27623       arm_unwind_emit_sequence (asm_out_file, pat);
27624       break;
27625
27626     default:
27627       abort();
27628     }
27629 }
27630
27631
27632 /* Output a reference from a function exception table to the type_info
27633    object X.  The EABI specifies that the symbol should be relocated by
27634    an R_ARM_TARGET2 relocation.  */
27635
27636 static bool
27637 arm_output_ttype (rtx x)
27638 {
27639   fputs ("\t.word\t", asm_out_file);
27640   output_addr_const (asm_out_file, x);
27641   /* Use special relocations for symbol references.  */
27642   if (!CONST_INT_P (x))
27643     fputs ("(TARGET2)", asm_out_file);
27644   fputc ('\n', asm_out_file);
27645
27646   return TRUE;
27647 }
27648
27649 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27650
27651 static void
27652 arm_asm_emit_except_personality (rtx personality)
27653 {
27654   fputs ("\t.personality\t", asm_out_file);
27655   output_addr_const (asm_out_file, personality);
27656   fputc ('\n', asm_out_file);
27657 }
27658 #endif /* ARM_UNWIND_INFO */
27659
27660 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27661
27662 static void
27663 arm_asm_init_sections (void)
27664 {
27665 #if ARM_UNWIND_INFO
27666   exception_section = get_unnamed_section (0, output_section_asm_op,
27667                                            "\t.handlerdata");
27668 #endif /* ARM_UNWIND_INFO */
27669
27670 #ifdef OBJECT_FORMAT_ELF
27671   if (target_pure_code)
27672     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27673 #endif
27674 }
27675
27676 /* Output unwind directives for the start/end of a function.  */
27677
27678 void
27679 arm_output_fn_unwind (FILE * f, bool prologue)
27680 {
27681   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27682     return;
27683
27684   if (prologue)
27685     fputs ("\t.fnstart\n", f);
27686   else
27687     {
27688       /* If this function will never be unwound, then mark it as such.
27689          The came condition is used in arm_unwind_emit to suppress
27690          the frame annotations.  */
27691       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27692           && (TREE_NOTHROW (current_function_decl)
27693               || crtl->all_throwers_are_sibcalls))
27694         fputs("\t.cantunwind\n", f);
27695
27696       fputs ("\t.fnend\n", f);
27697     }
27698 }
27699
27700 static bool
27701 arm_emit_tls_decoration (FILE *fp, rtx x)
27702 {
27703   enum tls_reloc reloc;
27704   rtx val;
27705
27706   val = XVECEXP (x, 0, 0);
27707   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27708
27709   output_addr_const (fp, val);
27710
27711   switch (reloc)
27712     {
27713     case TLS_GD32:
27714       fputs ("(tlsgd)", fp);
27715       break;
27716     case TLS_LDM32:
27717       fputs ("(tlsldm)", fp);
27718       break;
27719     case TLS_LDO32:
27720       fputs ("(tlsldo)", fp);
27721       break;
27722     case TLS_IE32:
27723       fputs ("(gottpoff)", fp);
27724       break;
27725     case TLS_LE32:
27726       fputs ("(tpoff)", fp);
27727       break;
27728     case TLS_DESCSEQ:
27729       fputs ("(tlsdesc)", fp);
27730       break;
27731     default:
27732       gcc_unreachable ();
27733     }
27734
27735   switch (reloc)
27736     {
27737     case TLS_GD32:
27738     case TLS_LDM32:
27739     case TLS_IE32:
27740     case TLS_DESCSEQ:
27741       fputs (" + (. - ", fp);
27742       output_addr_const (fp, XVECEXP (x, 0, 2));
27743       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27744       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27745       output_addr_const (fp, XVECEXP (x, 0, 3));
27746       fputc (')', fp);
27747       break;
27748     default:
27749       break;
27750     }
27751
27752   return TRUE;
27753 }
27754
27755 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27756
27757 static void
27758 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27759 {
27760   gcc_assert (size == 4);
27761   fputs ("\t.word\t", file);
27762   output_addr_const (file, x);
27763   fputs ("(tlsldo)", file);
27764 }
27765
27766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27767
27768 static bool
27769 arm_output_addr_const_extra (FILE *fp, rtx x)
27770 {
27771   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27772     return arm_emit_tls_decoration (fp, x);
27773   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27774     {
27775       char label[256];
27776       int labelno = INTVAL (XVECEXP (x, 0, 0));
27777
27778       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27779       assemble_name_raw (fp, label);
27780
27781       return TRUE;
27782     }
27783   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27784     {
27785       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27786       if (GOT_PCREL)
27787         fputs ("+.", fp);
27788       fputs ("-(", fp);
27789       output_addr_const (fp, XVECEXP (x, 0, 0));
27790       fputc (')', fp);
27791       return TRUE;
27792     }
27793   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27794     {
27795       output_addr_const (fp, XVECEXP (x, 0, 0));
27796       if (GOT_PCREL)
27797         fputs ("+.", fp);
27798       fputs ("-(", fp);
27799       output_addr_const (fp, XVECEXP (x, 0, 1));
27800       fputc (')', fp);
27801       return TRUE;
27802     }
27803   else if (GET_CODE (x) == CONST_VECTOR)
27804     return arm_emit_vector_const (fp, x);
27805
27806   return FALSE;
27807 }
27808
27809 /* Output assembly for a shift instruction.
27810    SET_FLAGS determines how the instruction modifies the condition codes.
27811    0 - Do not set condition codes.
27812    1 - Set condition codes.
27813    2 - Use smallest instruction.  */
27814 const char *
27815 arm_output_shift(rtx * operands, int set_flags)
27816 {
27817   char pattern[100];
27818   static const char flag_chars[3] = {'?', '.', '!'};
27819   const char *shift;
27820   HOST_WIDE_INT val;
27821   char c;
27822
27823   c = flag_chars[set_flags];
27824   shift = shift_op(operands[3], &val);
27825   if (shift)
27826     {
27827       if (val != -1)
27828         operands[2] = GEN_INT(val);
27829       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27830     }
27831   else
27832     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27833
27834   output_asm_insn (pattern, operands);
27835   return "";
27836 }
27837
27838 /* Output assembly for a WMMX immediate shift instruction.  */
27839 const char *
27840 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27841 {
27842   int shift = INTVAL (operands[2]);
27843   char templ[50];
27844   machine_mode opmode = GET_MODE (operands[0]);
27845
27846   gcc_assert (shift >= 0);
27847
27848   /* If the shift value in the register versions is > 63 (for D qualifier),
27849      31 (for W qualifier) or 15 (for H qualifier).  */
27850   if (((opmode == V4HImode) && (shift > 15))
27851         || ((opmode == V2SImode) && (shift > 31))
27852         || ((opmode == DImode) && (shift > 63)))
27853   {
27854     if (wror_or_wsra)
27855       {
27856         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27857         output_asm_insn (templ, operands);
27858         if (opmode == DImode)
27859           {
27860             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27861             output_asm_insn (templ, operands);
27862           }
27863       }
27864     else
27865       {
27866         /* The destination register will contain all zeros.  */
27867         sprintf (templ, "wzero\t%%0");
27868         output_asm_insn (templ, operands);
27869       }
27870     return "";
27871   }
27872
27873   if ((opmode == DImode) && (shift > 32))
27874     {
27875       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27876       output_asm_insn (templ, operands);
27877       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27878       output_asm_insn (templ, operands);
27879     }
27880   else
27881     {
27882       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27883       output_asm_insn (templ, operands);
27884     }
27885   return "";
27886 }
27887
27888 /* Output assembly for a WMMX tinsr instruction.  */
27889 const char *
27890 arm_output_iwmmxt_tinsr (rtx *operands)
27891 {
27892   int mask = INTVAL (operands[3]);
27893   int i;
27894   char templ[50];
27895   int units = mode_nunits[GET_MODE (operands[0])];
27896   gcc_assert ((mask & (mask - 1)) == 0);
27897   for (i = 0; i < units; ++i)
27898     {
27899       if ((mask & 0x01) == 1)
27900         {
27901           break;
27902         }
27903       mask >>= 1;
27904     }
27905   gcc_assert (i < units);
27906   {
27907     switch (GET_MODE (operands[0]))
27908       {
27909       case E_V8QImode:
27910         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27911         break;
27912       case E_V4HImode:
27913         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27914         break;
27915       case E_V2SImode:
27916         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27917         break;
27918       default:
27919         gcc_unreachable ();
27920         break;
27921       }
27922     output_asm_insn (templ, operands);
27923   }
27924   return "";
27925 }
27926
27927 /* Output a Thumb-1 casesi dispatch sequence.  */
27928 const char *
27929 thumb1_output_casesi (rtx *operands)
27930 {
27931   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27932
27933   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27934
27935   switch (GET_MODE(diff_vec))
27936     {
27937     case E_QImode:
27938       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27939               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27940     case E_HImode:
27941       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27942               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27943     case E_SImode:
27944       return "bl\t%___gnu_thumb1_case_si";
27945     default:
27946       gcc_unreachable ();
27947     }
27948 }
27949
27950 /* Output a Thumb-2 casesi instruction.  */
27951 const char *
27952 thumb2_output_casesi (rtx *operands)
27953 {
27954   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27955
27956   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27957
27958   output_asm_insn ("cmp\t%0, %1", operands);
27959   output_asm_insn ("bhi\t%l3", operands);
27960   switch (GET_MODE(diff_vec))
27961     {
27962     case E_QImode:
27963       return "tbb\t[%|pc, %0]";
27964     case E_HImode:
27965       return "tbh\t[%|pc, %0, lsl #1]";
27966     case E_SImode:
27967       if (flag_pic)
27968         {
27969           output_asm_insn ("adr\t%4, %l2", operands);
27970           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27971           output_asm_insn ("add\t%4, %4, %5", operands);
27972           return "bx\t%4";
27973         }
27974       else
27975         {
27976           output_asm_insn ("adr\t%4, %l2", operands);
27977           return "ldr\t%|pc, [%4, %0, lsl #2]";
27978         }
27979     default:
27980       gcc_unreachable ();
27981     }
27982 }
27983
27984 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
27985    per-core tuning structs.  */
27986 static int
27987 arm_issue_rate (void)
27988 {
27989   return current_tune->issue_rate;
27990 }
27991
27992 /* Return how many instructions should scheduler lookahead to choose the
27993    best one.  */
27994 static int
27995 arm_first_cycle_multipass_dfa_lookahead (void)
27996 {
27997   int issue_rate = arm_issue_rate ();
27998
27999   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28000 }
28001
28002 /* Enable modeling of L2 auto-prefetcher.  */
28003 static int
28004 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28005 {
28006   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28007 }
28008
28009 const char *
28010 arm_mangle_type (const_tree type)
28011 {
28012   /* The ARM ABI documents (10th October 2008) say that "__va_list"
28013      has to be managled as if it is in the "std" namespace.  */
28014   if (TARGET_AAPCS_BASED
28015       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28016     return "St9__va_list";
28017
28018   /* Half-precision float.  */
28019   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28020     return "Dh";
28021
28022   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28023      builtin type.  */
28024   if (TYPE_NAME (type) != NULL)
28025     return arm_mangle_builtin_type (type);
28026
28027   /* Use the default mangling.  */
28028   return NULL;
28029 }
28030
28031 /* Order of allocation of core registers for Thumb: this allocation is
28032    written over the corresponding initial entries of the array
28033    initialized with REG_ALLOC_ORDER.  We allocate all low registers
28034    first.  Saving and restoring a low register is usually cheaper than
28035    using a call-clobbered high register.  */
28036
28037 static const int thumb_core_reg_alloc_order[] =
28038 {
28039    3,  2,  1,  0,  4,  5,  6,  7,
28040   12, 14,  8,  9, 10, 11
28041 };
28042
28043 /* Adjust register allocation order when compiling for Thumb.  */
28044
28045 void
28046 arm_order_regs_for_local_alloc (void)
28047 {
28048   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28049   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28050   if (TARGET_THUMB)
28051     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28052             sizeof (thumb_core_reg_alloc_order));
28053 }
28054
28055 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
28056
28057 bool
28058 arm_frame_pointer_required (void)
28059 {
28060   if (SUBTARGET_FRAME_POINTER_REQUIRED)
28061     return true;
28062
28063   /* If the function receives nonlocal gotos, it needs to save the frame
28064      pointer in the nonlocal_goto_save_area object.  */
28065   if (cfun->has_nonlocal_label)
28066     return true;
28067
28068   /* The frame pointer is required for non-leaf APCS frames.  */
28069   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28070     return true;
28071
28072   /* If we are probing the stack in the prologue, we will have a faulting
28073      instruction prior to the stack adjustment and this requires a frame
28074      pointer if we want to catch the exception using the EABI unwinder.  */
28075   if (!IS_INTERRUPT (arm_current_func_type ())
28076       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28077           || flag_stack_clash_protection)
28078       && arm_except_unwind_info (&global_options) == UI_TARGET
28079       && cfun->can_throw_non_call_exceptions)
28080     {
28081       HOST_WIDE_INT size = get_frame_size ();
28082
28083       /* That's irrelevant if there is no stack adjustment.  */
28084       if (size <= 0)
28085         return false;
28086
28087       /* That's relevant only if there is a stack probe.  */
28088       if (crtl->is_leaf && !cfun->calls_alloca)
28089         {
28090           /* We don't have the final size of the frame so adjust.  */
28091           size += 32 * UNITS_PER_WORD;
28092           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28093             return true;
28094         }
28095       else
28096         return true;
28097     }
28098
28099   return false;
28100 }
28101
28102 /* Only thumb1 can't support conditional execution, so return true if
28103    the target is not thumb1.  */
28104 static bool
28105 arm_have_conditional_execution (void)
28106 {
28107   return !TARGET_THUMB1;
28108 }
28109
28110 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
28111 static HOST_WIDE_INT
28112 arm_vector_alignment (const_tree type)
28113 {
28114   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28115
28116   if (TARGET_AAPCS_BASED)
28117     align = MIN (align, 64);
28118
28119   return align;
28120 }
28121
28122 static void
28123 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28124 {
28125   if (!TARGET_NEON_VECTORIZE_DOUBLE)
28126     {
28127       sizes->safe_push (16);
28128       sizes->safe_push (8);
28129     }
28130 }
28131
28132 static bool
28133 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28134 {
28135   /* Vectors which aren't in packed structures will not be less aligned than
28136      the natural alignment of their element type, so this is safe.  */
28137   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28138     return !is_packed;
28139
28140   return default_builtin_vector_alignment_reachable (type, is_packed);
28141 }
28142
28143 static bool
28144 arm_builtin_support_vector_misalignment (machine_mode mode,
28145                                          const_tree type, int misalignment,
28146                                          bool is_packed)
28147 {
28148   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28149     {
28150       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28151
28152       if (is_packed)
28153         return align == 1;
28154
28155       /* If the misalignment is unknown, we should be able to handle the access
28156          so long as it is not to a member of a packed data structure.  */
28157       if (misalignment == -1)
28158         return true;
28159
28160       /* Return true if the misalignment is a multiple of the natural alignment
28161          of the vector's element type.  This is probably always going to be
28162          true in practice, since we've already established that this isn't a
28163          packed access.  */
28164       return ((misalignment % align) == 0);
28165     }
28166
28167   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28168                                                       is_packed);
28169 }
28170
28171 static void
28172 arm_conditional_register_usage (void)
28173 {
28174   int regno;
28175
28176   if (TARGET_THUMB1 && optimize_size)
28177     {
28178       /* When optimizing for size on Thumb-1, it's better not
28179         to use the HI regs, because of the overhead of
28180         stacking them.  */
28181       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28182         fixed_regs[regno] = call_used_regs[regno] = 1;
28183     }
28184
28185   /* The link register can be clobbered by any branch insn,
28186      but we have no way to track that at present, so mark
28187      it as unavailable.  */
28188   if (TARGET_THUMB1)
28189     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28190
28191   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28192     {
28193       /* VFPv3 registers are disabled when earlier VFP
28194          versions are selected due to the definition of
28195          LAST_VFP_REGNUM.  */
28196       for (regno = FIRST_VFP_REGNUM;
28197            regno <= LAST_VFP_REGNUM; ++ regno)
28198         {
28199           fixed_regs[regno] = 0;
28200           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28201             || regno >= FIRST_VFP_REGNUM + 32;
28202         }
28203     }
28204
28205   if (TARGET_REALLY_IWMMXT)
28206     {
28207       regno = FIRST_IWMMXT_GR_REGNUM;
28208       /* The 2002/10/09 revision of the XScale ABI has wCG0
28209          and wCG1 as call-preserved registers.  The 2002/11/21
28210          revision changed this so that all wCG registers are
28211          scratch registers.  */
28212       for (regno = FIRST_IWMMXT_GR_REGNUM;
28213            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28214         fixed_regs[regno] = 0;
28215       /* The XScale ABI has wR0 - wR9 as scratch registers,
28216          the rest as call-preserved registers.  */
28217       for (regno = FIRST_IWMMXT_REGNUM;
28218            regno <= LAST_IWMMXT_REGNUM; ++ regno)
28219         {
28220           fixed_regs[regno] = 0;
28221           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28222         }
28223     }
28224
28225   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28226     {
28227       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28228       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28229     }
28230   else if (TARGET_APCS_STACK)
28231     {
28232       fixed_regs[10]     = 1;
28233       call_used_regs[10] = 1;
28234     }
28235   /* -mcaller-super-interworking reserves r11 for calls to
28236      _interwork_r11_call_via_rN().  Making the register global
28237      is an easy way of ensuring that it remains valid for all
28238      calls.  */
28239   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28240       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28241     {
28242       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28243       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28244       if (TARGET_CALLER_INTERWORKING)
28245         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28246     }
28247   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28248 }
28249
28250 static reg_class_t
28251 arm_preferred_rename_class (reg_class_t rclass)
28252 {
28253   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28254      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28255      and code size can be reduced.  */
28256   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28257     return LO_REGS;
28258   else
28259     return NO_REGS;
28260 }
28261
28262 /* Compute the attribute "length" of insn "*push_multi".
28263    So this function MUST be kept in sync with that insn pattern.  */
28264 int
28265 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28266 {
28267   int i, regno, hi_reg;
28268   int num_saves = XVECLEN (parallel_op, 0);
28269
28270   /* ARM mode.  */
28271   if (TARGET_ARM)
28272     return 4;
28273   /* Thumb1 mode.  */
28274   if (TARGET_THUMB1)
28275     return 2;
28276
28277   /* Thumb2 mode.  */
28278   regno = REGNO (first_op);
28279   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28280      list is 8-bit.  Normally this means all registers in the list must be
28281      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28282      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28283      with 16-bit encoding.  */
28284   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28285   for (i = 1; i < num_saves && !hi_reg; i++)
28286     {
28287       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28288       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28289     }
28290
28291   if (!hi_reg)
28292     return 2;
28293   return 4;
28294 }
28295
28296 /* Compute the attribute "length" of insn.  Currently, this function is used
28297    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28298    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28299    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28300    true if OPERANDS contains insn which explicit updates base register.  */
28301
28302 int
28303 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28304 {
28305   /* ARM mode.  */
28306   if (TARGET_ARM)
28307     return 4;
28308   /* Thumb1 mode.  */
28309   if (TARGET_THUMB1)
28310     return 2;
28311
28312   rtx parallel_op = operands[0];
28313   /* Initialize to elements number of PARALLEL.  */
28314   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28315   /* Initialize the value to base register.  */
28316   unsigned regno = REGNO (operands[1]);
28317   /* Skip return and write back pattern.
28318      We only need register pop pattern for later analysis.  */
28319   unsigned first_indx = 0;
28320   first_indx += return_pc ? 1 : 0;
28321   first_indx += write_back_p ? 1 : 0;
28322
28323   /* A pop operation can be done through LDM or POP.  If the base register is SP
28324      and if it's with write back, then a LDM will be alias of POP.  */
28325   bool pop_p = (regno == SP_REGNUM && write_back_p);
28326   bool ldm_p = !pop_p;
28327
28328   /* Check base register for LDM.  */
28329   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28330     return 4;
28331
28332   /* Check each register in the list.  */
28333   for (; indx >= first_indx; indx--)
28334     {
28335       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28336       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28337          comment in arm_attr_length_push_multi.  */
28338       if (REGNO_REG_CLASS (regno) == HI_REGS
28339           && (regno != PC_REGNUM || ldm_p))
28340         return 4;
28341     }
28342
28343   return 2;
28344 }
28345
28346 /* Compute the number of instructions emitted by output_move_double.  */
28347 int
28348 arm_count_output_move_double_insns (rtx *operands)
28349 {
28350   int count;
28351   rtx ops[2];
28352   /* output_move_double may modify the operands array, so call it
28353      here on a copy of the array.  */
28354   ops[0] = operands[0];
28355   ops[1] = operands[1];
28356   output_move_double (ops, false, &count);
28357   return count;
28358 }
28359
28360 int
28361 vfp3_const_double_for_fract_bits (rtx operand)
28362 {
28363   REAL_VALUE_TYPE r0;
28364
28365   if (!CONST_DOUBLE_P (operand))
28366     return 0;
28367
28368   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28369   if (exact_real_inverse (DFmode, &r0)
28370       && !REAL_VALUE_NEGATIVE (r0))
28371     {
28372       if (exact_real_truncate (DFmode, &r0))
28373         {
28374           HOST_WIDE_INT value = real_to_integer (&r0);
28375           value = value & 0xffffffff;
28376           if ((value != 0) && ( (value & (value - 1)) == 0))
28377             {
28378               int ret = exact_log2 (value);
28379               gcc_assert (IN_RANGE (ret, 0, 31));
28380               return ret;
28381             }
28382         }
28383     }
28384   return 0;
28385 }
28386
28387 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28388    log2 is in [1, 32], return that log2.  Otherwise return -1.
28389    This is used in the patterns for vcvt.s32.f32 floating-point to
28390    fixed-point conversions.  */
28391
28392 int
28393 vfp3_const_double_for_bits (rtx x)
28394 {
28395   const REAL_VALUE_TYPE *r;
28396
28397   if (!CONST_DOUBLE_P (x))
28398     return -1;
28399
28400   r = CONST_DOUBLE_REAL_VALUE (x);
28401
28402   if (REAL_VALUE_NEGATIVE (*r)
28403       || REAL_VALUE_ISNAN (*r)
28404       || REAL_VALUE_ISINF (*r)
28405       || !real_isinteger (r, SFmode))
28406     return -1;
28407
28408   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28409
28410 /* The exact_log2 above will have returned -1 if this is
28411    not an exact log2.  */
28412   if (!IN_RANGE (hwint, 1, 32))
28413     return -1;
28414
28415   return hwint;
28416 }
28417
28418 \f
28419 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28420
28421 static void
28422 arm_pre_atomic_barrier (enum memmodel model)
28423 {
28424   if (need_atomic_barrier_p (model, true))
28425     emit_insn (gen_memory_barrier ());
28426 }
28427
28428 static void
28429 arm_post_atomic_barrier (enum memmodel model)
28430 {
28431   if (need_atomic_barrier_p (model, false))
28432     emit_insn (gen_memory_barrier ());
28433 }
28434
28435 /* Emit the load-exclusive and store-exclusive instructions.
28436    Use acquire and release versions if necessary.  */
28437
28438 static void
28439 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28440 {
28441   rtx (*gen) (rtx, rtx);
28442
28443   if (acq)
28444     {
28445       switch (mode)
28446         {
28447         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28448         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28449         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28450         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28451         default:
28452           gcc_unreachable ();
28453         }
28454     }
28455   else
28456     {
28457       switch (mode)
28458         {
28459         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28460         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28461         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28462         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28463         default:
28464           gcc_unreachable ();
28465         }
28466     }
28467
28468   emit_insn (gen (rval, mem));
28469 }
28470
28471 static void
28472 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28473                           rtx mem, bool rel)
28474 {
28475   rtx (*gen) (rtx, rtx, rtx);
28476
28477   if (rel)
28478     {
28479       switch (mode)
28480         {
28481         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28482         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28483         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28484         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28485         default:
28486           gcc_unreachable ();
28487         }
28488     }
28489   else
28490     {
28491       switch (mode)
28492         {
28493         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28494         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28495         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28496         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28497         default:
28498           gcc_unreachable ();
28499         }
28500     }
28501
28502   emit_insn (gen (bval, rval, mem));
28503 }
28504
28505 /* Mark the previous jump instruction as unlikely.  */
28506
28507 static void
28508 emit_unlikely_jump (rtx insn)
28509 {
28510   rtx_insn *jump = emit_jump_insn (insn);
28511   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28512 }
28513
28514 /* Expand a compare and swap pattern.  */
28515
28516 void
28517 arm_expand_compare_and_swap (rtx operands[])
28518 {
28519   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28520   machine_mode mode;
28521   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28522
28523   bval = operands[0];
28524   rval = operands[1];
28525   mem = operands[2];
28526   oldval = operands[3];
28527   newval = operands[4];
28528   is_weak = operands[5];
28529   mod_s = operands[6];
28530   mod_f = operands[7];
28531   mode = GET_MODE (mem);
28532
28533   /* Normally the succ memory model must be stronger than fail, but in the
28534      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28535      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28536
28537   if (TARGET_HAVE_LDACQ
28538       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28539       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28540     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28541
28542   switch (mode)
28543     {
28544     case E_QImode:
28545     case E_HImode:
28546       /* For narrow modes, we're going to perform the comparison in SImode,
28547          so do the zero-extension now.  */
28548       rval = gen_reg_rtx (SImode);
28549       oldval = convert_modes (SImode, mode, oldval, true);
28550       /* FALLTHRU */
28551
28552     case E_SImode:
28553       /* Force the value into a register if needed.  We waited until after
28554          the zero-extension above to do this properly.  */
28555       if (!arm_add_operand (oldval, SImode))
28556         oldval = force_reg (SImode, oldval);
28557       break;
28558
28559     case E_DImode:
28560       if (!cmpdi_operand (oldval, mode))
28561         oldval = force_reg (mode, oldval);
28562       break;
28563
28564     default:
28565       gcc_unreachable ();
28566     }
28567
28568   if (TARGET_THUMB1)
28569     {
28570       switch (mode)
28571         {
28572         case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28573         case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28574         case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28575         case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28576         default:
28577           gcc_unreachable ();
28578         }
28579     }
28580   else
28581     {
28582       switch (mode)
28583         {
28584         case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28585         case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28586         case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28587         case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28588         default:
28589           gcc_unreachable ();
28590         }
28591     }
28592
28593   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28594   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28595
28596   if (mode == QImode || mode == HImode)
28597     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28598
28599   /* In all cases, we arrange for success to be signaled by Z set.
28600      This arrangement allows for the boolean result to be used directly
28601      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28602      boolean negation of the result is also stored in bval because Thumb-1
28603      backend lacks dependency tracking for CC flag due to flag-setting not
28604      being represented at RTL level.  */
28605   if (TARGET_THUMB1)
28606       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28607   else
28608     {
28609       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28610       emit_insn (gen_rtx_SET (bval, x));
28611     }
28612 }
28613
28614 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28615    another memory store between the load-exclusive and store-exclusive can
28616    reset the monitor from Exclusive to Open state.  This means we must wait
28617    until after reload to split the pattern, lest we get a register spill in
28618    the middle of the atomic sequence.  Success of the compare and swap is
28619    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28620    for Thumb-1 targets (ie. negation of the boolean value returned by
28621    atomic_compare_and_swapmode standard pattern in operand 0).  */
28622
28623 void
28624 arm_split_compare_and_swap (rtx operands[])
28625 {
28626   rtx rval, mem, oldval, newval, neg_bval;
28627   machine_mode mode;
28628   enum memmodel mod_s, mod_f;
28629   bool is_weak;
28630   rtx_code_label *label1, *label2;
28631   rtx x, cond;
28632
28633   rval = operands[1];
28634   mem = operands[2];
28635   oldval = operands[3];
28636   newval = operands[4];
28637   is_weak = (operands[5] != const0_rtx);
28638   mod_s = memmodel_from_int (INTVAL (operands[6]));
28639   mod_f = memmodel_from_int (INTVAL (operands[7]));
28640   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28641   mode = GET_MODE (mem);
28642
28643   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28644
28645   bool use_acquire = TARGET_HAVE_LDACQ
28646                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28647                           || is_mm_release (mod_s));
28648
28649   bool use_release = TARGET_HAVE_LDACQ
28650                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28651                           || is_mm_acquire (mod_s));
28652
28653   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28654      a full barrier is emitted after the store-release.  */
28655   if (is_armv8_sync)
28656     use_acquire = false;
28657
28658   /* Checks whether a barrier is needed and emits one accordingly.  */
28659   if (!(use_acquire || use_release))
28660     arm_pre_atomic_barrier (mod_s);
28661
28662   label1 = NULL;
28663   if (!is_weak)
28664     {
28665       label1 = gen_label_rtx ();
28666       emit_label (label1);
28667     }
28668   label2 = gen_label_rtx ();
28669
28670   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28671
28672   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28673      as required to communicate with arm_expand_compare_and_swap.  */
28674   if (TARGET_32BIT)
28675     {
28676       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28677       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28678       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28679                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28680       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28681     }
28682   else
28683     {
28684       emit_move_insn (neg_bval, const1_rtx);
28685       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28686       if (thumb1_cmpneg_operand (oldval, SImode))
28687         emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28688                                                     label2, cond));
28689       else
28690         emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28691     }
28692
28693   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28694
28695   /* Weak or strong, we want EQ to be true for success, so that we
28696      match the flags that we got from the compare above.  */
28697   if (TARGET_32BIT)
28698     {
28699       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28700       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28701       emit_insn (gen_rtx_SET (cond, x));
28702     }
28703
28704   if (!is_weak)
28705     {
28706       /* Z is set to boolean value of !neg_bval, as required to communicate
28707          with arm_expand_compare_and_swap.  */
28708       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28709       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28710     }
28711
28712   if (!is_mm_relaxed (mod_f))
28713     emit_label (label2);
28714
28715   /* Checks whether a barrier is needed and emits one accordingly.  */
28716   if (is_armv8_sync
28717       || !(use_acquire || use_release))
28718     arm_post_atomic_barrier (mod_s);
28719
28720   if (is_mm_relaxed (mod_f))
28721     emit_label (label2);
28722 }
28723
28724 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28725    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28726    operation).  Operation is performed on the content at MEM and on VALUE
28727    following the memory model MODEL_RTX.  The content at MEM before and after
28728    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28729    success of the operation is returned in COND.  Using a scratch register or
28730    an operand register for these determines what result is returned for that
28731    pattern.  */
28732
28733 void
28734 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28735                      rtx value, rtx model_rtx, rtx cond)
28736 {
28737   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28738   machine_mode mode = GET_MODE (mem);
28739   machine_mode wmode = (mode == DImode ? DImode : SImode);
28740   rtx_code_label *label;
28741   bool all_low_regs, bind_old_new;
28742   rtx x;
28743
28744   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28745
28746   bool use_acquire = TARGET_HAVE_LDACQ
28747                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28748                           || is_mm_release (model));
28749
28750   bool use_release = TARGET_HAVE_LDACQ
28751                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28752                           || is_mm_acquire (model));
28753
28754   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28755      a full barrier is emitted after the store-release.  */
28756   if (is_armv8_sync)
28757     use_acquire = false;
28758
28759   /* Checks whether a barrier is needed and emits one accordingly.  */
28760   if (!(use_acquire || use_release))
28761     arm_pre_atomic_barrier (model);
28762
28763   label = gen_label_rtx ();
28764   emit_label (label);
28765
28766   if (new_out)
28767     new_out = gen_lowpart (wmode, new_out);
28768   if (old_out)
28769     old_out = gen_lowpart (wmode, old_out);
28770   else
28771     old_out = new_out;
28772   value = simplify_gen_subreg (wmode, value, mode, 0);
28773
28774   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28775
28776   /* Does the operation require destination and first operand to use the same
28777      register?  This is decided by register constraints of relevant insn
28778      patterns in thumb1.md.  */
28779   gcc_assert (!new_out || REG_P (new_out));
28780   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28781                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28782                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28783   bind_old_new =
28784     (TARGET_THUMB1
28785      && code != SET
28786      && code != MINUS
28787      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28788
28789   /* We want to return the old value while putting the result of the operation
28790      in the same register as the old value so copy the old value over to the
28791      destination register and use that register for the operation.  */
28792   if (old_out && bind_old_new)
28793     {
28794       emit_move_insn (new_out, old_out);
28795       old_out = new_out;
28796     }
28797
28798   switch (code)
28799     {
28800     case SET:
28801       new_out = value;
28802       break;
28803
28804     case NOT:
28805       x = gen_rtx_AND (wmode, old_out, value);
28806       emit_insn (gen_rtx_SET (new_out, x));
28807       x = gen_rtx_NOT (wmode, new_out);
28808       emit_insn (gen_rtx_SET (new_out, x));
28809       break;
28810
28811     case MINUS:
28812       if (CONST_INT_P (value))
28813         {
28814           value = GEN_INT (-INTVAL (value));
28815           code = PLUS;
28816         }
28817       /* FALLTHRU */
28818
28819     case PLUS:
28820       if (mode == DImode)
28821         {
28822           /* DImode plus/minus need to clobber flags.  */
28823           /* The adddi3 and subdi3 patterns are incorrectly written so that
28824              they require matching operands, even when we could easily support
28825              three operands.  Thankfully, this can be fixed up post-splitting,
28826              as the individual add+adc patterns do accept three operands and
28827              post-reload cprop can make these moves go away.  */
28828           emit_move_insn (new_out, old_out);
28829           if (code == PLUS)
28830             x = gen_adddi3 (new_out, new_out, value);
28831           else
28832             x = gen_subdi3 (new_out, new_out, value);
28833           emit_insn (x);
28834           break;
28835         }
28836       /* FALLTHRU */
28837
28838     default:
28839       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28840       emit_insn (gen_rtx_SET (new_out, x));
28841       break;
28842     }
28843
28844   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28845                             use_release);
28846
28847   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28848   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28849
28850   /* Checks whether a barrier is needed and emits one accordingly.  */
28851   if (is_armv8_sync
28852       || !(use_acquire || use_release))
28853     arm_post_atomic_barrier (model);
28854 }
28855 \f
28856 #define MAX_VECT_LEN 16
28857
28858 struct expand_vec_perm_d
28859 {
28860   rtx target, op0, op1;
28861   vec_perm_indices perm;
28862   machine_mode vmode;
28863   bool one_vector_p;
28864   bool testing_p;
28865 };
28866
28867 /* Generate a variable permutation.  */
28868
28869 static void
28870 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28871 {
28872   machine_mode vmode = GET_MODE (target);
28873   bool one_vector_p = rtx_equal_p (op0, op1);
28874
28875   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28876   gcc_checking_assert (GET_MODE (op0) == vmode);
28877   gcc_checking_assert (GET_MODE (op1) == vmode);
28878   gcc_checking_assert (GET_MODE (sel) == vmode);
28879   gcc_checking_assert (TARGET_NEON);
28880
28881   if (one_vector_p)
28882     {
28883       if (vmode == V8QImode)
28884         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28885       else
28886         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28887     }
28888   else
28889     {
28890       rtx pair;
28891
28892       if (vmode == V8QImode)
28893         {
28894           pair = gen_reg_rtx (V16QImode);
28895           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28896           pair = gen_lowpart (TImode, pair);
28897           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28898         }
28899       else
28900         {
28901           pair = gen_reg_rtx (OImode);
28902           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28903           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28904         }
28905     }
28906 }
28907
28908 void
28909 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28910 {
28911   machine_mode vmode = GET_MODE (target);
28912   unsigned int nelt = GET_MODE_NUNITS (vmode);
28913   bool one_vector_p = rtx_equal_p (op0, op1);
28914   rtx mask;
28915
28916   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28917      numbering of elements for big-endian, we must reverse the order.  */
28918   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28919
28920   /* The VTBL instruction does not use a modulo index, so we must take care
28921      of that ourselves.  */
28922   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28923   mask = gen_const_vec_duplicate (vmode, mask);
28924   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28925
28926   arm_expand_vec_perm_1 (target, op0, op1, sel);
28927 }
28928
28929 /* Map lane ordering between architectural lane order, and GCC lane order,
28930    taking into account ABI.  See comment above output_move_neon for details.  */
28931
28932 static int
28933 neon_endian_lane_map (machine_mode mode, int lane)
28934 {
28935   if (BYTES_BIG_ENDIAN)
28936   {
28937     int nelems = GET_MODE_NUNITS (mode);
28938     /* Reverse lane order.  */
28939     lane = (nelems - 1 - lane);
28940     /* Reverse D register order, to match ABI.  */
28941     if (GET_MODE_SIZE (mode) == 16)
28942       lane = lane ^ (nelems / 2);
28943   }
28944   return lane;
28945 }
28946
28947 /* Some permutations index into pairs of vectors, this is a helper function
28948    to map indexes into those pairs of vectors.  */
28949
28950 static int
28951 neon_pair_endian_lane_map (machine_mode mode, int lane)
28952 {
28953   int nelem = GET_MODE_NUNITS (mode);
28954   if (BYTES_BIG_ENDIAN)
28955     lane =
28956       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28957   return lane;
28958 }
28959
28960 /* Generate or test for an insn that supports a constant permutation.  */
28961
28962 /* Recognize patterns for the VUZP insns.  */
28963
28964 static bool
28965 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28966 {
28967   unsigned int i, odd, mask, nelt = d->perm.length ();
28968   rtx out0, out1, in0, in1;
28969   rtx (*gen)(rtx, rtx, rtx, rtx);
28970   int first_elem;
28971   int swap_nelt;
28972
28973   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28974     return false;
28975
28976   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28977      big endian pattern on 64 bit vectors, so we correct for that.  */
28978   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28979     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28980
28981   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28982
28983   if (first_elem == neon_endian_lane_map (d->vmode, 0))
28984     odd = 0;
28985   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28986     odd = 1;
28987   else
28988     return false;
28989   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28990
28991   for (i = 0; i < nelt; i++)
28992     {
28993       unsigned elt =
28994         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28995       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28996         return false;
28997     }
28998
28999   /* Success!  */
29000   if (d->testing_p)
29001     return true;
29002
29003   switch (d->vmode)
29004     {
29005     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29006     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
29007     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
29008     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
29009     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
29010     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
29011     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
29012     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
29013     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
29014     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
29015     default:
29016       gcc_unreachable ();
29017     }
29018
29019   in0 = d->op0;
29020   in1 = d->op1;
29021   if (swap_nelt != 0)
29022     std::swap (in0, in1);
29023
29024   out0 = d->target;
29025   out1 = gen_reg_rtx (d->vmode);
29026   if (odd)
29027     std::swap (out0, out1);
29028
29029   emit_insn (gen (out0, in0, in1, out1));
29030   return true;
29031 }
29032
29033 /* Recognize patterns for the VZIP insns.  */
29034
29035 static bool
29036 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29037 {
29038   unsigned int i, high, mask, nelt = d->perm.length ();
29039   rtx out0, out1, in0, in1;
29040   rtx (*gen)(rtx, rtx, rtx, rtx);
29041   int first_elem;
29042   bool is_swapped;
29043
29044   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29045     return false;
29046
29047   is_swapped = BYTES_BIG_ENDIAN;
29048
29049   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29050
29051   high = nelt / 2;
29052   if (first_elem == neon_endian_lane_map (d->vmode, high))
29053     ;
29054   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29055     high = 0;
29056   else
29057     return false;
29058   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29059
29060   for (i = 0; i < nelt / 2; i++)
29061     {
29062       unsigned elt =
29063         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29064       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29065           != elt)
29066         return false;
29067       elt =
29068         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29069       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29070           != elt)
29071         return false;
29072     }
29073
29074   /* Success!  */
29075   if (d->testing_p)
29076     return true;
29077
29078   switch (d->vmode)
29079     {
29080     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29081     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
29082     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
29083     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
29084     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
29085     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
29086     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
29087     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
29088     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
29089     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
29090     default:
29091       gcc_unreachable ();
29092     }
29093
29094   in0 = d->op0;
29095   in1 = d->op1;
29096   if (is_swapped)
29097     std::swap (in0, in1);
29098
29099   out0 = d->target;
29100   out1 = gen_reg_rtx (d->vmode);
29101   if (high)
29102     std::swap (out0, out1);
29103
29104   emit_insn (gen (out0, in0, in1, out1));
29105   return true;
29106 }
29107
29108 /* Recognize patterns for the VREV insns.  */
29109
29110 static bool
29111 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29112 {
29113   unsigned int i, j, diff, nelt = d->perm.length ();
29114   rtx (*gen)(rtx, rtx);
29115
29116   if (!d->one_vector_p)
29117     return false;
29118
29119   diff = d->perm[0];
29120   switch (diff)
29121     {
29122     case 7:
29123       switch (d->vmode)
29124         {
29125         case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29126         case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
29127         default:
29128           return false;
29129         }
29130       break;
29131     case 3:
29132       switch (d->vmode)
29133         {
29134         case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29135         case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
29136         case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
29137         case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
29138         case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
29139         case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
29140         default:
29141           return false;
29142         }
29143       break;
29144     case 1:
29145       switch (d->vmode)
29146         {
29147         case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29148         case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
29149         case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
29150         case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
29151         case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
29152         case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
29153         case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29154         case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29155         default:
29156           return false;
29157         }
29158       break;
29159     default:
29160       return false;
29161     }
29162
29163   for (i = 0; i < nelt ; i += diff + 1)
29164     for (j = 0; j <= diff; j += 1)
29165       {
29166         /* This is guaranteed to be true as the value of diff
29167            is 7, 3, 1 and we should have enough elements in the
29168            queue to generate this. Getting a vector mask with a
29169            value of diff other than these values implies that
29170            something is wrong by the time we get here.  */
29171         gcc_assert (i + j < nelt);
29172         if (d->perm[i + j] != i + diff - j)
29173           return false;
29174       }
29175
29176   /* Success! */
29177   if (d->testing_p)
29178     return true;
29179
29180   emit_insn (gen (d->target, d->op0));
29181   return true;
29182 }
29183
29184 /* Recognize patterns for the VTRN insns.  */
29185
29186 static bool
29187 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29188 {
29189   unsigned int i, odd, mask, nelt = d->perm.length ();
29190   rtx out0, out1, in0, in1;
29191   rtx (*gen)(rtx, rtx, rtx, rtx);
29192
29193   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29194     return false;
29195
29196   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29197   if (d->perm[0] == 0)
29198     odd = 0;
29199   else if (d->perm[0] == 1)
29200     odd = 1;
29201   else
29202     return false;
29203   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29204
29205   for (i = 0; i < nelt; i += 2)
29206     {
29207       if (d->perm[i] != i + odd)
29208         return false;
29209       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29210         return false;
29211     }
29212
29213   /* Success!  */
29214   if (d->testing_p)
29215     return true;
29216
29217   switch (d->vmode)
29218     {
29219     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29220     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29221     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29222     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29223     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29224     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29225     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29226     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29227     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29228     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29229     default:
29230       gcc_unreachable ();
29231     }
29232
29233   in0 = d->op0;
29234   in1 = d->op1;
29235   if (BYTES_BIG_ENDIAN)
29236     {
29237       std::swap (in0, in1);
29238       odd = !odd;
29239     }
29240
29241   out0 = d->target;
29242   out1 = gen_reg_rtx (d->vmode);
29243   if (odd)
29244     std::swap (out0, out1);
29245
29246   emit_insn (gen (out0, in0, in1, out1));
29247   return true;
29248 }
29249
29250 /* Recognize patterns for the VEXT insns.  */
29251
29252 static bool
29253 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29254 {
29255   unsigned int i, nelt = d->perm.length ();
29256   rtx (*gen) (rtx, rtx, rtx, rtx);
29257   rtx offset;
29258
29259   unsigned int location;
29260
29261   unsigned int next  = d->perm[0] + 1;
29262
29263   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29264   if (BYTES_BIG_ENDIAN)
29265     return false;
29266
29267   /* Check if the extracted indexes are increasing by one.  */
29268   for (i = 1; i < nelt; next++, i++)
29269     {
29270       /* If we hit the most significant element of the 2nd vector in
29271          the previous iteration, no need to test further.  */
29272       if (next == 2 * nelt)
29273         return false;
29274
29275       /* If we are operating on only one vector: it could be a
29276          rotation.  If there are only two elements of size < 64, let
29277          arm_evpc_neon_vrev catch it.  */
29278       if (d->one_vector_p && (next == nelt))
29279         {
29280           if ((nelt == 2) && (d->vmode != V2DImode))
29281             return false;
29282           else
29283             next = 0;
29284         }
29285
29286       if (d->perm[i] != next)
29287         return false;
29288     }
29289
29290   location = d->perm[0];
29291
29292   switch (d->vmode)
29293     {
29294     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29295     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29296     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29297     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29298     case E_V2SImode: gen = gen_neon_vextv2si; break;
29299     case E_V4SImode: gen = gen_neon_vextv4si; break;
29300     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29301     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29302     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29303     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29304     case E_V2DImode: gen = gen_neon_vextv2di; break;
29305     default:
29306       return false;
29307     }
29308
29309   /* Success! */
29310   if (d->testing_p)
29311     return true;
29312
29313   offset = GEN_INT (location);
29314   emit_insn (gen (d->target, d->op0, d->op1, offset));
29315   return true;
29316 }
29317
29318 /* The NEON VTBL instruction is a fully variable permuation that's even
29319    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29320    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29321    can do slightly better by expanding this as a constant where we don't
29322    have to apply a mask.  */
29323
29324 static bool
29325 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29326 {
29327   rtx rperm[MAX_VECT_LEN], sel;
29328   machine_mode vmode = d->vmode;
29329   unsigned int i, nelt = d->perm.length ();
29330
29331   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29332      numbering of elements for big-endian, we must reverse the order.  */
29333   if (BYTES_BIG_ENDIAN)
29334     return false;
29335
29336   if (d->testing_p)
29337     return true;
29338
29339   /* Generic code will try constant permutation twice.  Once with the
29340      original mode and again with the elements lowered to QImode.
29341      So wait and don't do the selector expansion ourselves.  */
29342   if (vmode != V8QImode && vmode != V16QImode)
29343     return false;
29344
29345   for (i = 0; i < nelt; ++i)
29346     rperm[i] = GEN_INT (d->perm[i]);
29347   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29348   sel = force_reg (vmode, sel);
29349
29350   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29351   return true;
29352 }
29353
29354 static bool
29355 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29356 {
29357   /* Check if the input mask matches vext before reordering the
29358      operands.  */
29359   if (TARGET_NEON)
29360     if (arm_evpc_neon_vext (d))
29361       return true;
29362
29363   /* The pattern matching functions above are written to look for a small
29364      number to begin the sequence (0, 1, N/2).  If we begin with an index
29365      from the second operand, we can swap the operands.  */
29366   unsigned int nelt = d->perm.length ();
29367   if (d->perm[0] >= nelt)
29368     {
29369       d->perm.rotate_inputs (1);
29370       std::swap (d->op0, d->op1);
29371     }
29372
29373   if (TARGET_NEON)
29374     {
29375       if (arm_evpc_neon_vuzp (d))
29376         return true;
29377       if (arm_evpc_neon_vzip (d))
29378         return true;
29379       if (arm_evpc_neon_vrev (d))
29380         return true;
29381       if (arm_evpc_neon_vtrn (d))
29382         return true;
29383       return arm_evpc_neon_vtbl (d);
29384     }
29385   return false;
29386 }
29387
29388 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
29389
29390 static bool
29391 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29392                               const vec_perm_indices &sel)
29393 {
29394   struct expand_vec_perm_d d;
29395   int i, nelt, which;
29396
29397   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29398     return false;
29399
29400   d.target = target;
29401   d.op0 = op0;
29402   d.op1 = op1;
29403
29404   d.vmode = vmode;
29405   gcc_assert (VECTOR_MODE_P (d.vmode));
29406   d.testing_p = !target;
29407
29408   nelt = GET_MODE_NUNITS (d.vmode);
29409   for (i = which = 0; i < nelt; ++i)
29410     {
29411       int ei = sel[i] & (2 * nelt - 1);
29412       which |= (ei < nelt ? 1 : 2);
29413     }
29414
29415   switch (which)
29416     {
29417     default:
29418       gcc_unreachable();
29419
29420     case 3:
29421       d.one_vector_p = false;
29422       if (d.testing_p || !rtx_equal_p (op0, op1))
29423         break;
29424
29425       /* The elements of PERM do not suggest that only the first operand
29426          is used, but both operands are identical.  Allow easier matching
29427          of the permutation by folding the permutation into the single
29428          input vector.  */
29429       /* FALLTHRU */
29430     case 2:
29431       d.op0 = op1;
29432       d.one_vector_p = true;
29433       break;
29434
29435     case 1:
29436       d.op1 = op0;
29437       d.one_vector_p = true;
29438       break;
29439     }
29440
29441   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29442
29443   if (!d.testing_p)
29444     return arm_expand_vec_perm_const_1 (&d);
29445
29446   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29447   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29448   if (!d.one_vector_p)
29449     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29450
29451   start_sequence ();
29452   bool ret = arm_expand_vec_perm_const_1 (&d);
29453   end_sequence ();
29454
29455   return ret;
29456 }
29457
29458 bool
29459 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29460 {
29461   /* If we are soft float and we do not have ldrd
29462      then all auto increment forms are ok.  */
29463   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29464     return true;
29465
29466   switch (code)
29467     {
29468       /* Post increment and Pre Decrement are supported for all
29469          instruction forms except for vector forms.  */
29470     case ARM_POST_INC:
29471     case ARM_PRE_DEC:
29472       if (VECTOR_MODE_P (mode))
29473         {
29474           if (code != ARM_PRE_DEC)
29475             return true;
29476           else
29477             return false;
29478         }
29479
29480       return true;
29481
29482     case ARM_POST_DEC:
29483     case ARM_PRE_INC:
29484       /* Without LDRD and mode size greater than
29485          word size, there is no point in auto-incrementing
29486          because ldm and stm will not have these forms.  */
29487       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29488         return false;
29489
29490       /* Vector and floating point modes do not support
29491          these auto increment forms.  */
29492       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29493         return false;
29494
29495       return true;
29496
29497     default:
29498       return false;
29499
29500     }
29501
29502   return false;
29503 }
29504
29505 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29506    on ARM, since we know that shifts by negative amounts are no-ops.
29507    Additionally, the default expansion code is not available or suitable
29508    for post-reload insn splits (this can occur when the register allocator
29509    chooses not to do a shift in NEON).
29510
29511    This function is used in both initial expand and post-reload splits, and
29512    handles all kinds of 64-bit shifts.
29513
29514    Input requirements:
29515     - It is safe for the input and output to be the same register, but
29516       early-clobber rules apply for the shift amount and scratch registers.
29517     - Shift by register requires both scratch registers.  In all other cases
29518       the scratch registers may be NULL.
29519     - Ashiftrt by a register also clobbers the CC register.  */
29520 void
29521 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29522                                rtx amount, rtx scratch1, rtx scratch2)
29523 {
29524   rtx out_high = gen_highpart (SImode, out);
29525   rtx out_low = gen_lowpart (SImode, out);
29526   rtx in_high = gen_highpart (SImode, in);
29527   rtx in_low = gen_lowpart (SImode, in);
29528
29529   /* Terminology:
29530         in = the register pair containing the input value.
29531         out = the destination register pair.
29532         up = the high- or low-part of each pair.
29533         down = the opposite part to "up".
29534      In a shift, we can consider bits to shift from "up"-stream to
29535      "down"-stream, so in a left-shift "up" is the low-part and "down"
29536      is the high-part of each register pair.  */
29537
29538   rtx out_up   = code == ASHIFT ? out_low : out_high;
29539   rtx out_down = code == ASHIFT ? out_high : out_low;
29540   rtx in_up   = code == ASHIFT ? in_low : in_high;
29541   rtx in_down = code == ASHIFT ? in_high : in_low;
29542
29543   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29544   gcc_assert (out
29545               && (REG_P (out) || GET_CODE (out) == SUBREG)
29546               && GET_MODE (out) == DImode);
29547   gcc_assert (in
29548               && (REG_P (in) || GET_CODE (in) == SUBREG)
29549               && GET_MODE (in) == DImode);
29550   gcc_assert (amount
29551               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29552                    && GET_MODE (amount) == SImode)
29553                   || CONST_INT_P (amount)));
29554   gcc_assert (scratch1 == NULL
29555               || (GET_CODE (scratch1) == SCRATCH)
29556               || (GET_MODE (scratch1) == SImode
29557                   && REG_P (scratch1)));
29558   gcc_assert (scratch2 == NULL
29559               || (GET_CODE (scratch2) == SCRATCH)
29560               || (GET_MODE (scratch2) == SImode
29561                   && REG_P (scratch2)));
29562   gcc_assert (!REG_P (out) || !REG_P (amount)
29563               || !HARD_REGISTER_P (out)
29564               || (REGNO (out) != REGNO (amount)
29565                   && REGNO (out) + 1 != REGNO (amount)));
29566
29567   /* Macros to make following code more readable.  */
29568   #define SUB_32(DEST,SRC) \
29569             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29570   #define RSB_32(DEST,SRC) \
29571             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29572   #define SUB_S_32(DEST,SRC) \
29573             gen_addsi3_compare0 ((DEST), (SRC), \
29574                                  GEN_INT (-32))
29575   #define SET(DEST,SRC) \
29576             gen_rtx_SET ((DEST), (SRC))
29577   #define SHIFT(CODE,SRC,AMOUNT) \
29578             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29579   #define LSHIFT(CODE,SRC,AMOUNT) \
29580             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29581                             SImode, (SRC), (AMOUNT))
29582   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29583             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29584                             SImode, (SRC), (AMOUNT))
29585   #define ORR(A,B) \
29586             gen_rtx_IOR (SImode, (A), (B))
29587   #define BRANCH(COND,LABEL) \
29588             gen_arm_cond_branch ((LABEL), \
29589                                  gen_rtx_ ## COND (CCmode, cc_reg, \
29590                                                    const0_rtx), \
29591                                  cc_reg)
29592
29593   /* Shifts by register and shifts by constant are handled separately.  */
29594   if (CONST_INT_P (amount))
29595     {
29596       /* We have a shift-by-constant.  */
29597
29598       /* First, handle out-of-range shift amounts.
29599          In both cases we try to match the result an ARM instruction in a
29600          shift-by-register would give.  This helps reduce execution
29601          differences between optimization levels, but it won't stop other
29602          parts of the compiler doing different things.  This is "undefined
29603          behavior, in any case.  */
29604       if (INTVAL (amount) <= 0)
29605         emit_insn (gen_movdi (out, in));
29606       else if (INTVAL (amount) >= 64)
29607         {
29608           if (code == ASHIFTRT)
29609             {
29610               rtx const31_rtx = GEN_INT (31);
29611               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29612               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29613             }
29614           else
29615             emit_insn (gen_movdi (out, const0_rtx));
29616         }
29617
29618       /* Now handle valid shifts. */
29619       else if (INTVAL (amount) < 32)
29620         {
29621           /* Shifts by a constant less than 32.  */
29622           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29623
29624           /* Clearing the out register in DImode first avoids lots
29625              of spilling and results in less stack usage.
29626              Later this redundant insn is completely removed.
29627              Do that only if "in" and "out" are different registers.  */
29628           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29629             emit_insn (SET (out, const0_rtx));
29630           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29631           emit_insn (SET (out_down,
29632                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
29633                                out_down)));
29634           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29635         }
29636       else
29637         {
29638           /* Shifts by a constant greater than 31.  */
29639           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29640
29641           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29642             emit_insn (SET (out, const0_rtx));
29643           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29644           if (code == ASHIFTRT)
29645             emit_insn (gen_ashrsi3 (out_up, in_up,
29646                                     GEN_INT (31)));
29647           else
29648             emit_insn (SET (out_up, const0_rtx));
29649         }
29650     }
29651   else
29652     {
29653       /* We have a shift-by-register.  */
29654       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29655
29656       /* This alternative requires the scratch registers.  */
29657       gcc_assert (scratch1 && REG_P (scratch1));
29658       gcc_assert (scratch2 && REG_P (scratch2));
29659
29660       /* We will need the values "amount-32" and "32-amount" later.
29661          Swapping them around now allows the later code to be more general. */
29662       switch (code)
29663         {
29664         case ASHIFT:
29665           emit_insn (SUB_32 (scratch1, amount));
29666           emit_insn (RSB_32 (scratch2, amount));
29667           break;
29668         case ASHIFTRT:
29669           emit_insn (RSB_32 (scratch1, amount));
29670           /* Also set CC = amount > 32.  */
29671           emit_insn (SUB_S_32 (scratch2, amount));
29672           break;
29673         case LSHIFTRT:
29674           emit_insn (RSB_32 (scratch1, amount));
29675           emit_insn (SUB_32 (scratch2, amount));
29676           break;
29677         default:
29678           gcc_unreachable ();
29679         }
29680
29681       /* Emit code like this:
29682
29683          arithmetic-left:
29684             out_down = in_down << amount;
29685             out_down = (in_up << (amount - 32)) | out_down;
29686             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29687             out_up = in_up << amount;
29688
29689          arithmetic-right:
29690             out_down = in_down >> amount;
29691             out_down = (in_up << (32 - amount)) | out_down;
29692             if (amount < 32)
29693               out_down = ((signed)in_up >> (amount - 32)) | out_down;
29694             out_up = in_up << amount;
29695
29696          logical-right:
29697             out_down = in_down >> amount;
29698             out_down = (in_up << (32 - amount)) | out_down;
29699             if (amount < 32)
29700               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29701             out_up = in_up << amount;
29702
29703           The ARM and Thumb2 variants are the same but implemented slightly
29704           differently.  If this were only called during expand we could just
29705           use the Thumb2 case and let combine do the right thing, but this
29706           can also be called from post-reload splitters.  */
29707
29708       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29709
29710       if (!TARGET_THUMB2)
29711         {
29712           /* Emit code for ARM mode.  */
29713           emit_insn (SET (out_down,
29714                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29715           if (code == ASHIFTRT)
29716             {
29717               rtx_code_label *done_label = gen_label_rtx ();
29718               emit_jump_insn (BRANCH (LT, done_label));
29719               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29720                                              out_down)));
29721               emit_label (done_label);
29722             }
29723           else
29724             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29725                                            out_down)));
29726         }
29727       else
29728         {
29729           /* Emit code for Thumb2 mode.
29730              Thumb2 can't do shift and or in one insn.  */
29731           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29732           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29733
29734           if (code == ASHIFTRT)
29735             {
29736               rtx_code_label *done_label = gen_label_rtx ();
29737               emit_jump_insn (BRANCH (LT, done_label));
29738               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29739               emit_insn (SET (out_down, ORR (out_down, scratch2)));
29740               emit_label (done_label);
29741             }
29742           else
29743             {
29744               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29745               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29746             }
29747         }
29748
29749       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29750     }
29751
29752   #undef SUB_32
29753   #undef RSB_32
29754   #undef SUB_S_32
29755   #undef SET
29756   #undef SHIFT
29757   #undef LSHIFT
29758   #undef REV_LSHIFT
29759   #undef ORR
29760   #undef BRANCH
29761 }
29762
29763 /* Returns true if the pattern is a valid symbolic address, which is either a
29764    symbol_ref or (symbol_ref + addend).
29765
29766    According to the ARM ELF ABI, the initial addend of REL-type relocations
29767    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29768    literal field of the instruction as a 16-bit signed value in the range
29769    -32768 <= A < 32768.  */
29770
29771 bool
29772 arm_valid_symbolic_address_p (rtx addr)
29773 {
29774   rtx xop0, xop1 = NULL_RTX;
29775   rtx tmp = addr;
29776
29777   if (target_word_relocations)
29778     return false;
29779
29780   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29781     return true;
29782
29783   /* (const (plus: symbol_ref const_int))  */
29784   if (GET_CODE (addr) == CONST)
29785     tmp = XEXP (addr, 0);
29786
29787   if (GET_CODE (tmp) == PLUS)
29788     {
29789       xop0 = XEXP (tmp, 0);
29790       xop1 = XEXP (tmp, 1);
29791
29792       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29793           return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29794     }
29795
29796   return false;
29797 }
29798
29799 /* Returns true if a valid comparison operation and makes
29800    the operands in a form that is valid.  */
29801 bool
29802 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29803 {
29804   enum rtx_code code = GET_CODE (*comparison);
29805   int code_int;
29806   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29807     ? GET_MODE (*op2) : GET_MODE (*op1);
29808
29809   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29810
29811   if (code == UNEQ || code == LTGT)
29812     return false;
29813
29814   code_int = (int)code;
29815   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29816   PUT_CODE (*comparison, (enum rtx_code)code_int);
29817
29818   switch (mode)
29819     {
29820     case E_SImode:
29821       if (!arm_add_operand (*op1, mode))
29822         *op1 = force_reg (mode, *op1);
29823       if (!arm_add_operand (*op2, mode))
29824         *op2 = force_reg (mode, *op2);
29825       return true;
29826
29827     case E_DImode:
29828       if (!cmpdi_operand (*op1, mode))
29829         *op1 = force_reg (mode, *op1);
29830       if (!cmpdi_operand (*op2, mode))
29831         *op2 = force_reg (mode, *op2);
29832       return true;
29833
29834     case E_HFmode:
29835       if (!TARGET_VFP_FP16INST)
29836         break;
29837       /* FP16 comparisons are done in SF mode.  */
29838       mode = SFmode;
29839       *op1 = convert_to_mode (mode, *op1, 1);
29840       *op2 = convert_to_mode (mode, *op2, 1);
29841       /* Fall through.  */
29842     case E_SFmode:
29843     case E_DFmode:
29844       if (!vfp_compare_operand (*op1, mode))
29845         *op1 = force_reg (mode, *op1);
29846       if (!vfp_compare_operand (*op2, mode))
29847         *op2 = force_reg (mode, *op2);
29848       return true;
29849     default:
29850       break;
29851     }
29852
29853   return false;
29854
29855 }
29856
29857 /* Maximum number of instructions to set block of memory.  */
29858 static int
29859 arm_block_set_max_insns (void)
29860 {
29861   if (optimize_function_for_size_p (cfun))
29862     return 4;
29863   else
29864     return current_tune->max_insns_inline_memset;
29865 }
29866
29867 /* Return TRUE if it's profitable to set block of memory for
29868    non-vectorized case.  VAL is the value to set the memory
29869    with.  LENGTH is the number of bytes to set.  ALIGN is the
29870    alignment of the destination memory in bytes.  UNALIGNED_P
29871    is TRUE if we can only set the memory with instructions
29872    meeting alignment requirements.  USE_STRD_P is TRUE if we
29873    can use strd to set the memory.  */
29874 static bool
29875 arm_block_set_non_vect_profit_p (rtx val,
29876                                  unsigned HOST_WIDE_INT length,
29877                                  unsigned HOST_WIDE_INT align,
29878                                  bool unaligned_p, bool use_strd_p)
29879 {
29880   int num = 0;
29881   /* For leftovers in bytes of 0-7, we can set the memory block using
29882      strb/strh/str with minimum instruction number.  */
29883   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29884
29885   if (unaligned_p)
29886     {
29887       num = arm_const_inline_cost (SET, val);
29888       num += length / align + length % align;
29889     }
29890   else if (use_strd_p)
29891     {
29892       num = arm_const_double_inline_cost (val);
29893       num += (length >> 3) + leftover[length & 7];
29894     }
29895   else
29896     {
29897       num = arm_const_inline_cost (SET, val);
29898       num += (length >> 2) + leftover[length & 3];
29899     }
29900
29901   /* We may be able to combine last pair STRH/STRB into a single STR
29902      by shifting one byte back.  */
29903   if (unaligned_access && length > 3 && (length & 3) == 3)
29904     num--;
29905
29906   return (num <= arm_block_set_max_insns ());
29907 }
29908
29909 /* Return TRUE if it's profitable to set block of memory for
29910    vectorized case.  LENGTH is the number of bytes to set.
29911    ALIGN is the alignment of destination memory in bytes.
29912    MODE is the vector mode used to set the memory.  */
29913 static bool
29914 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29915                              unsigned HOST_WIDE_INT align,
29916                              machine_mode mode)
29917 {
29918   int num;
29919   bool unaligned_p = ((align & 3) != 0);
29920   unsigned int nelt = GET_MODE_NUNITS (mode);
29921
29922   /* Instruction loading constant value.  */
29923   num = 1;
29924   /* Instructions storing the memory.  */
29925   num += (length + nelt - 1) / nelt;
29926   /* Instructions adjusting the address expression.  Only need to
29927      adjust address expression if it's 4 bytes aligned and bytes
29928      leftover can only be stored by mis-aligned store instruction.  */
29929   if (!unaligned_p && (length & 3) != 0)
29930     num++;
29931
29932   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29933   if (!unaligned_p && mode == V16QImode)
29934     num--;
29935
29936   return (num <= arm_block_set_max_insns ());
29937 }
29938
29939 /* Set a block of memory using vectorization instructions for the
29940    unaligned case.  We fill the first LENGTH bytes of the memory
29941    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29942    the alignment requirement of memory.  Return TRUE if succeeded.  */
29943 static bool
29944 arm_block_set_unaligned_vect (rtx dstbase,
29945                               unsigned HOST_WIDE_INT length,
29946                               unsigned HOST_WIDE_INT value,
29947                               unsigned HOST_WIDE_INT align)
29948 {
29949   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29950   rtx dst, mem;
29951   rtx val_vec, reg;
29952   rtx (*gen_func) (rtx, rtx);
29953   machine_mode mode;
29954   unsigned HOST_WIDE_INT v = value;
29955   unsigned int offset = 0;
29956   gcc_assert ((align & 0x3) != 0);
29957   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29958   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29959   if (length >= nelt_v16)
29960     {
29961       mode = V16QImode;
29962       gen_func = gen_movmisalignv16qi;
29963     }
29964   else
29965     {
29966       mode = V8QImode;
29967       gen_func = gen_movmisalignv8qi;
29968     }
29969   nelt_mode = GET_MODE_NUNITS (mode);
29970   gcc_assert (length >= nelt_mode);
29971   /* Skip if it isn't profitable.  */
29972   if (!arm_block_set_vect_profit_p (length, align, mode))
29973     return false;
29974
29975   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29976   mem = adjust_automodify_address (dstbase, mode, dst, offset);
29977
29978   v = sext_hwi (v, BITS_PER_WORD);
29979
29980   reg = gen_reg_rtx (mode);
29981   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29982   /* Emit instruction loading the constant value.  */
29983   emit_move_insn (reg, val_vec);
29984
29985   /* Handle nelt_mode bytes in a vector.  */
29986   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29987     {
29988       emit_insn ((*gen_func) (mem, reg));
29989       if (i + 2 * nelt_mode <= length)
29990         {
29991           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29992           offset += nelt_mode;
29993           mem = adjust_automodify_address (dstbase, mode, dst, offset);
29994         }
29995     }
29996
29997   /* If there are not less than nelt_v8 bytes leftover, we must be in
29998      V16QI mode.  */
29999   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30000
30001   /* Handle (8, 16) bytes leftover.  */
30002   if (i + nelt_v8 < length)
30003     {
30004       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30005       offset += length - i;
30006       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30007
30008       /* We are shifting bytes back, set the alignment accordingly.  */
30009       if ((length & 1) != 0 && align >= 2)
30010         set_mem_align (mem, BITS_PER_UNIT);
30011
30012       emit_insn (gen_movmisalignv16qi (mem, reg));
30013     }
30014   /* Handle (0, 8] bytes leftover.  */
30015   else if (i < length && i + nelt_v8 >= length)
30016     {
30017       if (mode == V16QImode)
30018         reg = gen_lowpart (V8QImode, reg);
30019
30020       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30021                                               + (nelt_mode - nelt_v8))));
30022       offset += (length - i) + (nelt_mode - nelt_v8);
30023       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30024
30025       /* We are shifting bytes back, set the alignment accordingly.  */
30026       if ((length & 1) != 0 && align >= 2)
30027         set_mem_align (mem, BITS_PER_UNIT);
30028
30029       emit_insn (gen_movmisalignv8qi (mem, reg));
30030     }
30031
30032   return true;
30033 }
30034
30035 /* Set a block of memory using vectorization instructions for the
30036    aligned case.  We fill the first LENGTH bytes of the memory area
30037    starting from DSTBASE with byte constant VALUE.  ALIGN is the
30038    alignment requirement of memory.  Return TRUE if succeeded.  */
30039 static bool
30040 arm_block_set_aligned_vect (rtx dstbase,
30041                             unsigned HOST_WIDE_INT length,
30042                             unsigned HOST_WIDE_INT value,
30043                             unsigned HOST_WIDE_INT align)
30044 {
30045   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30046   rtx dst, addr, mem;
30047   rtx val_vec, reg;
30048   machine_mode mode;
30049   unsigned HOST_WIDE_INT v = value;
30050   unsigned int offset = 0;
30051
30052   gcc_assert ((align & 0x3) == 0);
30053   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30054   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30055   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30056     mode = V16QImode;
30057   else
30058     mode = V8QImode;
30059
30060   nelt_mode = GET_MODE_NUNITS (mode);
30061   gcc_assert (length >= nelt_mode);
30062   /* Skip if it isn't profitable.  */
30063   if (!arm_block_set_vect_profit_p (length, align, mode))
30064     return false;
30065
30066   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30067
30068   v = sext_hwi (v, BITS_PER_WORD);
30069
30070   reg = gen_reg_rtx (mode);
30071   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30072   /* Emit instruction loading the constant value.  */
30073   emit_move_insn (reg, val_vec);
30074
30075   i = 0;
30076   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
30077   if (mode == V16QImode)
30078     {
30079       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30080       emit_insn (gen_movmisalignv16qi (mem, reg));
30081       i += nelt_mode;
30082       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
30083       if (i + nelt_v8 < length && i + nelt_v16 > length)
30084         {
30085           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30086           offset += length - nelt_mode;
30087           mem = adjust_automodify_address (dstbase, mode, dst, offset);
30088           /* We are shifting bytes back, set the alignment accordingly.  */
30089           if ((length & 0x3) == 0)
30090             set_mem_align (mem, BITS_PER_UNIT * 4);
30091           else if ((length & 0x1) == 0)
30092             set_mem_align (mem, BITS_PER_UNIT * 2);
30093           else
30094             set_mem_align (mem, BITS_PER_UNIT);
30095
30096           emit_insn (gen_movmisalignv16qi (mem, reg));
30097           return true;
30098         }
30099       /* Fall through for bytes leftover.  */
30100       mode = V8QImode;
30101       nelt_mode = GET_MODE_NUNITS (mode);
30102       reg = gen_lowpart (V8QImode, reg);
30103     }
30104
30105   /* Handle 8 bytes in a vector.  */
30106   for (; (i + nelt_mode <= length); i += nelt_mode)
30107     {
30108       addr = plus_constant (Pmode, dst, i);
30109       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30110       emit_move_insn (mem, reg);
30111     }
30112
30113   /* Handle single word leftover by shifting 4 bytes back.  We can
30114      use aligned access for this case.  */
30115   if (i + UNITS_PER_WORD == length)
30116     {
30117       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30118       offset += i - UNITS_PER_WORD;
30119       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30120       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30121       if (align > UNITS_PER_WORD)
30122         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30123
30124       emit_move_insn (mem, reg);
30125     }
30126   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30127      We have to use unaligned access for this case.  */
30128   else if (i < length)
30129     {
30130       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30131       offset += length - nelt_mode;
30132       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30133       /* We are shifting bytes back, set the alignment accordingly.  */
30134       if ((length & 1) == 0)
30135         set_mem_align (mem, BITS_PER_UNIT * 2);
30136       else
30137         set_mem_align (mem, BITS_PER_UNIT);
30138
30139       emit_insn (gen_movmisalignv8qi (mem, reg));
30140     }
30141
30142   return true;
30143 }
30144
30145 /* Set a block of memory using plain strh/strb instructions, only
30146    using instructions allowed by ALIGN on processor.  We fill the
30147    first LENGTH bytes of the memory area starting from DSTBASE
30148    with byte constant VALUE.  ALIGN is the alignment requirement
30149    of memory.  */
30150 static bool
30151 arm_block_set_unaligned_non_vect (rtx dstbase,
30152                                   unsigned HOST_WIDE_INT length,
30153                                   unsigned HOST_WIDE_INT value,
30154                                   unsigned HOST_WIDE_INT align)
30155 {
30156   unsigned int i;
30157   rtx dst, addr, mem;
30158   rtx val_exp, val_reg, reg;
30159   machine_mode mode;
30160   HOST_WIDE_INT v = value;
30161
30162   gcc_assert (align == 1 || align == 2);
30163
30164   if (align == 2)
30165     v |= (value << BITS_PER_UNIT);
30166
30167   v = sext_hwi (v, BITS_PER_WORD);
30168   val_exp = GEN_INT (v);
30169   /* Skip if it isn't profitable.  */
30170   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30171                                         align, true, false))
30172     return false;
30173
30174   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30175   mode = (align == 2 ? HImode : QImode);
30176   val_reg = force_reg (SImode, val_exp);
30177   reg = gen_lowpart (mode, val_reg);
30178
30179   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30180     {
30181       addr = plus_constant (Pmode, dst, i);
30182       mem = adjust_automodify_address (dstbase, mode, addr, i);
30183       emit_move_insn (mem, reg);
30184     }
30185
30186   /* Handle single byte leftover.  */
30187   if (i + 1 == length)
30188     {
30189       reg = gen_lowpart (QImode, val_reg);
30190       addr = plus_constant (Pmode, dst, i);
30191       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30192       emit_move_insn (mem, reg);
30193       i++;
30194     }
30195
30196   gcc_assert (i == length);
30197   return true;
30198 }
30199
30200 /* Set a block of memory using plain strd/str/strh/strb instructions,
30201    to permit unaligned copies on processors which support unaligned
30202    semantics for those instructions.  We fill the first LENGTH bytes
30203    of the memory area starting from DSTBASE with byte constant VALUE.
30204    ALIGN is the alignment requirement of memory.  */
30205 static bool
30206 arm_block_set_aligned_non_vect (rtx dstbase,
30207                                 unsigned HOST_WIDE_INT length,
30208                                 unsigned HOST_WIDE_INT value,
30209                                 unsigned HOST_WIDE_INT align)
30210 {
30211   unsigned int i;
30212   rtx dst, addr, mem;
30213   rtx val_exp, val_reg, reg;
30214   unsigned HOST_WIDE_INT v;
30215   bool use_strd_p;
30216
30217   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30218                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30219
30220   v = (value | (value << 8) | (value << 16) | (value << 24));
30221   if (length < UNITS_PER_WORD)
30222     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30223
30224   if (use_strd_p)
30225     v |= (v << BITS_PER_WORD);
30226   else
30227     v = sext_hwi (v, BITS_PER_WORD);
30228
30229   val_exp = GEN_INT (v);
30230   /* Skip if it isn't profitable.  */
30231   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30232                                         align, false, use_strd_p))
30233     {
30234       if (!use_strd_p)
30235         return false;
30236
30237       /* Try without strd.  */
30238       v = (v >> BITS_PER_WORD);
30239       v = sext_hwi (v, BITS_PER_WORD);
30240       val_exp = GEN_INT (v);
30241       use_strd_p = false;
30242       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30243                                             align, false, use_strd_p))
30244         return false;
30245     }
30246
30247   i = 0;
30248   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30249   /* Handle double words using strd if possible.  */
30250   if (use_strd_p)
30251     {
30252       val_reg = force_reg (DImode, val_exp);
30253       reg = val_reg;
30254       for (; (i + 8 <= length); i += 8)
30255         {
30256           addr = plus_constant (Pmode, dst, i);
30257           mem = adjust_automodify_address (dstbase, DImode, addr, i);
30258           emit_move_insn (mem, reg);
30259         }
30260     }
30261   else
30262     val_reg = force_reg (SImode, val_exp);
30263
30264   /* Handle words.  */
30265   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30266   for (; (i + 4 <= length); i += 4)
30267     {
30268       addr = plus_constant (Pmode, dst, i);
30269       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30270       if ((align & 3) == 0)
30271         emit_move_insn (mem, reg);
30272       else
30273         emit_insn (gen_unaligned_storesi (mem, reg));
30274     }
30275
30276   /* Merge last pair of STRH and STRB into a STR if possible.  */
30277   if (unaligned_access && i > 0 && (i + 3) == length)
30278     {
30279       addr = plus_constant (Pmode, dst, i - 1);
30280       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30281       /* We are shifting one byte back, set the alignment accordingly.  */
30282       if ((align & 1) == 0)
30283         set_mem_align (mem, BITS_PER_UNIT);
30284
30285       /* Most likely this is an unaligned access, and we can't tell at
30286          compilation time.  */
30287       emit_insn (gen_unaligned_storesi (mem, reg));
30288       return true;
30289     }
30290
30291   /* Handle half word leftover.  */
30292   if (i + 2 <= length)
30293     {
30294       reg = gen_lowpart (HImode, val_reg);
30295       addr = plus_constant (Pmode, dst, i);
30296       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30297       if ((align & 1) == 0)
30298         emit_move_insn (mem, reg);
30299       else
30300         emit_insn (gen_unaligned_storehi (mem, reg));
30301
30302       i += 2;
30303     }
30304
30305   /* Handle single byte leftover.  */
30306   if (i + 1 == length)
30307     {
30308       reg = gen_lowpart (QImode, val_reg);
30309       addr = plus_constant (Pmode, dst, i);
30310       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30311       emit_move_insn (mem, reg);
30312     }
30313
30314   return true;
30315 }
30316
30317 /* Set a block of memory using vectorization instructions for both
30318    aligned and unaligned cases.  We fill the first LENGTH bytes of
30319    the memory area starting from DSTBASE with byte constant VALUE.
30320    ALIGN is the alignment requirement of memory.  */
30321 static bool
30322 arm_block_set_vect (rtx dstbase,
30323                     unsigned HOST_WIDE_INT length,
30324                     unsigned HOST_WIDE_INT value,
30325                     unsigned HOST_WIDE_INT align)
30326 {
30327   /* Check whether we need to use unaligned store instruction.  */
30328   if (((align & 3) != 0 || (length & 3) != 0)
30329       /* Check whether unaligned store instruction is available.  */
30330       && (!unaligned_access || BYTES_BIG_ENDIAN))
30331     return false;
30332
30333   if ((align & 3) == 0)
30334     return arm_block_set_aligned_vect (dstbase, length, value, align);
30335   else
30336     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30337 }
30338
30339 /* Expand string store operation.  Firstly we try to do that by using
30340    vectorization instructions, then try with ARM unaligned access and
30341    double-word store if profitable.  OPERANDS[0] is the destination,
30342    OPERANDS[1] is the number of bytes, operands[2] is the value to
30343    initialize the memory, OPERANDS[3] is the known alignment of the
30344    destination.  */
30345 bool
30346 arm_gen_setmem (rtx *operands)
30347 {
30348   rtx dstbase = operands[0];
30349   unsigned HOST_WIDE_INT length;
30350   unsigned HOST_WIDE_INT value;
30351   unsigned HOST_WIDE_INT align;
30352
30353   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30354     return false;
30355
30356   length = UINTVAL (operands[1]);
30357   if (length > 64)
30358     return false;
30359
30360   value = (UINTVAL (operands[2]) & 0xFF);
30361   align = UINTVAL (operands[3]);
30362   if (TARGET_NEON && length >= 8
30363       && current_tune->string_ops_prefer_neon
30364       && arm_block_set_vect (dstbase, length, value, align))
30365     return true;
30366
30367   if (!unaligned_access && (align & 3) != 0)
30368     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30369
30370   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30371 }
30372
30373
30374 static bool
30375 arm_macro_fusion_p (void)
30376 {
30377   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30378 }
30379
30380 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30381    for MOVW / MOVT macro fusion.  */
30382
30383 static bool
30384 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30385 {
30386   /* We are trying to fuse
30387      movw imm / movt imm
30388     instructions as a group that gets scheduled together.  */
30389
30390   rtx set_dest = SET_DEST (curr_set);
30391
30392   if (GET_MODE (set_dest) != SImode)
30393     return false;
30394
30395   /* We are trying to match:
30396      prev (movw)  == (set (reg r0) (const_int imm16))
30397      curr (movt) == (set (zero_extract (reg r0)
30398                                         (const_int 16)
30399                                         (const_int 16))
30400                           (const_int imm16_1))
30401      or
30402      prev (movw) == (set (reg r1)
30403                           (high (symbol_ref ("SYM"))))
30404     curr (movt) == (set (reg r0)
30405                         (lo_sum (reg r1)
30406                                 (symbol_ref ("SYM"))))  */
30407
30408     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30409       {
30410         if (CONST_INT_P (SET_SRC (curr_set))
30411             && CONST_INT_P (SET_SRC (prev_set))
30412             && REG_P (XEXP (set_dest, 0))
30413             && REG_P (SET_DEST (prev_set))
30414             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30415           return true;
30416
30417       }
30418     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30419              && REG_P (SET_DEST (curr_set))
30420              && REG_P (SET_DEST (prev_set))
30421              && GET_CODE (SET_SRC (prev_set)) == HIGH
30422              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30423       return true;
30424
30425   return false;
30426 }
30427
30428 static bool
30429 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30430 {
30431   rtx prev_set = single_set (prev);
30432   rtx curr_set = single_set (curr);
30433
30434   if (!prev_set
30435       || !curr_set)
30436     return false;
30437
30438   if (any_condjump_p (curr))
30439     return false;
30440
30441   if (!arm_macro_fusion_p ())
30442     return false;
30443
30444   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30445       && aarch_crypto_can_dual_issue (prev, curr))
30446     return true;
30447
30448   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30449       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30450     return true;
30451
30452   return false;
30453 }
30454
30455 /* Return true iff the instruction fusion described by OP is enabled.  */
30456 bool
30457 arm_fusion_enabled_p (tune_params::fuse_ops op)
30458 {
30459   return current_tune->fusible_ops & op;
30460 }
30461
30462 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30463    scheduled for speculative execution.  Reject the long-running division
30464    and square-root instructions.  */
30465
30466 static bool
30467 arm_sched_can_speculate_insn (rtx_insn *insn)
30468 {
30469   switch (get_attr_type (insn))
30470     {
30471       case TYPE_SDIV:
30472       case TYPE_UDIV:
30473       case TYPE_FDIVS:
30474       case TYPE_FDIVD:
30475       case TYPE_FSQRTS:
30476       case TYPE_FSQRTD:
30477       case TYPE_NEON_FP_SQRT_S:
30478       case TYPE_NEON_FP_SQRT_D:
30479       case TYPE_NEON_FP_SQRT_S_Q:
30480       case TYPE_NEON_FP_SQRT_D_Q:
30481       case TYPE_NEON_FP_DIV_S:
30482       case TYPE_NEON_FP_DIV_D:
30483       case TYPE_NEON_FP_DIV_S_Q:
30484       case TYPE_NEON_FP_DIV_D_Q:
30485         return false;
30486       default:
30487         return true;
30488     }
30489 }
30490
30491 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30492
30493 static unsigned HOST_WIDE_INT
30494 arm_asan_shadow_offset (void)
30495 {
30496   return HOST_WIDE_INT_1U << 29;
30497 }
30498
30499
30500 /* This is a temporary fix for PR60655.  Ideally we need
30501    to handle most of these cases in the generic part but
30502    currently we reject minus (..) (sym_ref).  We try to
30503    ameliorate the case with minus (sym_ref1) (sym_ref2)
30504    where they are in the same section.  */
30505
30506 static bool
30507 arm_const_not_ok_for_debug_p (rtx p)
30508 {
30509   tree decl_op0 = NULL;
30510   tree decl_op1 = NULL;
30511
30512   if (GET_CODE (p) == UNSPEC)
30513     return true;
30514   if (GET_CODE (p) == MINUS)
30515     {
30516       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30517         {
30518           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30519           if (decl_op1
30520               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30521               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30522             {
30523               if ((VAR_P (decl_op1)
30524                    || TREE_CODE (decl_op1) == CONST_DECL)
30525                   && (VAR_P (decl_op0)
30526                       || TREE_CODE (decl_op0) == CONST_DECL))
30527                 return (get_variable_section (decl_op1, false)
30528                         != get_variable_section (decl_op0, false));
30529
30530               if (TREE_CODE (decl_op1) == LABEL_DECL
30531                   && TREE_CODE (decl_op0) == LABEL_DECL)
30532                 return (DECL_CONTEXT (decl_op1)
30533                         != DECL_CONTEXT (decl_op0));
30534             }
30535
30536           return true;
30537         }
30538     }
30539
30540   return false;
30541 }
30542
30543 /* return TRUE if x is a reference to a value in a constant pool */
30544 extern bool
30545 arm_is_constant_pool_ref (rtx x)
30546 {
30547   return (MEM_P (x)
30548           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30549           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30550 }
30551
30552 /* Remember the last target of arm_set_current_function.  */
30553 static GTY(()) tree arm_previous_fndecl;
30554
30555 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30556
30557 void
30558 save_restore_target_globals (tree new_tree)
30559 {
30560   /* If we have a previous state, use it.  */
30561   if (TREE_TARGET_GLOBALS (new_tree))
30562     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30563   else if (new_tree == target_option_default_node)
30564     restore_target_globals (&default_target_globals);
30565   else
30566     {
30567       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30568       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30569     }
30570
30571   arm_option_params_internal ();
30572 }
30573
30574 /* Invalidate arm_previous_fndecl.  */
30575
30576 void
30577 arm_reset_previous_fndecl (void)
30578 {
30579   arm_previous_fndecl = NULL_TREE;
30580 }
30581
30582 /* Establish appropriate back-end context for processing the function
30583    FNDECL.  The argument might be NULL to indicate processing at top
30584    level, outside of any function scope.  */
30585
30586 static void
30587 arm_set_current_function (tree fndecl)
30588 {
30589   if (!fndecl || fndecl == arm_previous_fndecl)
30590     return;
30591
30592   tree old_tree = (arm_previous_fndecl
30593                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30594                    : NULL_TREE);
30595
30596   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30597
30598   /* If current function has no attributes but previous one did,
30599      use the default node.  */
30600   if (! new_tree && old_tree)
30601     new_tree = target_option_default_node;
30602
30603   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30604      the default have been handled by save_restore_target_globals from
30605      arm_pragma_target_parse.  */
30606   if (old_tree == new_tree)
30607     return;
30608
30609   arm_previous_fndecl = fndecl;
30610
30611   /* First set the target options.  */
30612   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30613
30614   save_restore_target_globals (new_tree);
30615 }
30616
30617 /* Implement TARGET_OPTION_PRINT.  */
30618
30619 static void
30620 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30621 {
30622   int flags = ptr->x_target_flags;
30623   const char *fpu_name;
30624
30625   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30626               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30627
30628   fprintf (file, "%*sselected isa %s\n", indent, "",
30629            TARGET_THUMB2_P (flags) ? "thumb2" :
30630            TARGET_THUMB_P (flags) ? "thumb1" :
30631            "arm");
30632
30633   if (ptr->x_arm_arch_string)
30634     fprintf (file, "%*sselected architecture %s\n", indent, "",
30635              ptr->x_arm_arch_string);
30636
30637   if (ptr->x_arm_cpu_string)
30638     fprintf (file, "%*sselected CPU %s\n", indent, "",
30639              ptr->x_arm_cpu_string);
30640
30641   if (ptr->x_arm_tune_string)
30642     fprintf (file, "%*sselected tune %s\n", indent, "",
30643              ptr->x_arm_tune_string);
30644
30645   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30646 }
30647
30648 /* Hook to determine if one function can safely inline another.  */
30649
30650 static bool
30651 arm_can_inline_p (tree caller, tree callee)
30652 {
30653   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30654   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30655   bool can_inline = true;
30656
30657   struct cl_target_option *caller_opts
30658         = TREE_TARGET_OPTION (caller_tree ? caller_tree
30659                                            : target_option_default_node);
30660
30661   struct cl_target_option *callee_opts
30662         = TREE_TARGET_OPTION (callee_tree ? callee_tree
30663                                            : target_option_default_node);
30664
30665   if (callee_opts == caller_opts)
30666     return true;
30667
30668   /* Callee's ISA features should be a subset of the caller's.  */
30669   struct arm_build_target caller_target;
30670   struct arm_build_target callee_target;
30671   caller_target.isa = sbitmap_alloc (isa_num_bits);
30672   callee_target.isa = sbitmap_alloc (isa_num_bits);
30673
30674   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30675                               false);
30676   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30677                               false);
30678   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30679     can_inline = false;
30680
30681   sbitmap_free (caller_target.isa);
30682   sbitmap_free (callee_target.isa);
30683
30684   /* OK to inline between different modes.
30685      Function with mode specific instructions, e.g using asm,
30686      must be explicitly protected with noinline.  */
30687   return can_inline;
30688 }
30689
30690 /* Hook to fix function's alignment affected by target attribute.  */
30691
30692 static void
30693 arm_relayout_function (tree fndecl)
30694 {
30695   if (DECL_USER_ALIGN (fndecl))
30696     return;
30697
30698   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30699
30700   if (!callee_tree)
30701     callee_tree = target_option_default_node;
30702
30703   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30704   SET_DECL_ALIGN
30705     (fndecl,
30706      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30707 }
30708
30709 /* Inner function to process the attribute((target(...))), take an argument and
30710    set the current options from the argument.  If we have a list, recursively
30711    go over the list.  */
30712
30713 static bool
30714 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30715 {
30716   if (TREE_CODE (args) == TREE_LIST)
30717     {
30718       bool ret = true;
30719
30720       for (; args; args = TREE_CHAIN (args))
30721         if (TREE_VALUE (args)
30722             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30723           ret = false;
30724       return ret;
30725     }
30726
30727   else if (TREE_CODE (args) != STRING_CST)
30728     {
30729       error ("attribute %<target%> argument not a string");
30730       return false;
30731     }
30732
30733   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30734   char *q;
30735
30736   while ((q = strtok (argstr, ",")) != NULL)
30737     {
30738       while (ISSPACE (*q)) ++q;
30739
30740       argstr = NULL;
30741       if (!strncmp (q, "thumb", 5))
30742           opts->x_target_flags |= MASK_THUMB;
30743
30744       else if (!strncmp (q, "arm", 3))
30745           opts->x_target_flags &= ~MASK_THUMB;
30746
30747       else if (!strncmp (q, "fpu=", 4))
30748         {
30749           int fpu_index;
30750           if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30751                                        &fpu_index, CL_TARGET))
30752             {
30753               error ("invalid fpu for target attribute or pragma %qs", q);
30754               return false;
30755             }
30756           if (fpu_index == TARGET_FPU_auto)
30757             {
30758               /* This doesn't really make sense until we support
30759                  general dynamic selection of the architecture and all
30760                  sub-features.  */
30761               sorry ("auto fpu selection not currently permitted here");
30762               return false;
30763             }
30764           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30765         }
30766       else if (!strncmp (q, "arch=", 5))
30767         {
30768           char* arch = q+5;
30769           const arch_option *arm_selected_arch
30770              = arm_parse_arch_option_name (all_architectures, "arch", arch);
30771
30772           if (!arm_selected_arch)
30773             {
30774               error ("invalid architecture for target attribute or pragma %qs",
30775                      q);
30776               return false;
30777             }
30778
30779           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30780         }
30781       else if (q[0] == '+')
30782         {
30783           opts->x_arm_arch_string
30784             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30785         }
30786       else
30787         {
30788           error ("unknown target attribute or pragma %qs", q);
30789           return false;
30790         }
30791     }
30792
30793   return true;
30794 }
30795
30796 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30797
30798 tree
30799 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30800                                  struct gcc_options *opts_set)
30801 {
30802   struct cl_target_option cl_opts;
30803
30804   if (!arm_valid_target_attribute_rec (args, opts))
30805     return NULL_TREE;
30806
30807   cl_target_option_save (&cl_opts, opts);
30808   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30809   arm_option_check_internal (opts);
30810   /* Do any overrides, such as global options arch=xxx.
30811      We do this since arm_active_target was overridden.  */
30812   arm_option_reconfigure_globals ();
30813   arm_options_perform_arch_sanity_checks ();
30814   arm_option_override_internal (opts, opts_set);
30815
30816   return build_target_option_node (opts);
30817 }
30818
30819 static void
30820 add_attribute  (const char * mode, tree *attributes)
30821 {
30822   size_t len = strlen (mode);
30823   tree value = build_string (len, mode);
30824
30825   TREE_TYPE (value) = build_array_type (char_type_node,
30826                                         build_index_type (size_int (len)));
30827
30828   *attributes = tree_cons (get_identifier ("target"),
30829                            build_tree_list (NULL_TREE, value),
30830                            *attributes);
30831 }
30832
30833 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30834
30835 static void
30836 arm_insert_attributes (tree fndecl, tree * attributes)
30837 {
30838   const char *mode;
30839
30840   if (! TARGET_FLIP_THUMB)
30841     return;
30842
30843   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30844       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30845    return;
30846
30847   /* Nested definitions must inherit mode.  */
30848   if (current_function_decl)
30849    {
30850      mode = TARGET_THUMB ? "thumb" : "arm";
30851      add_attribute (mode, attributes);
30852      return;
30853    }
30854
30855   /* If there is already a setting don't change it.  */
30856   if (lookup_attribute ("target", *attributes) != NULL)
30857     return;
30858
30859   mode = thumb_flipper ? "thumb" : "arm";
30860   add_attribute (mode, attributes);
30861
30862   thumb_flipper = !thumb_flipper;
30863 }
30864
30865 /* Hook to validate attribute((target("string"))).  */
30866
30867 static bool
30868 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30869                               tree args, int ARG_UNUSED (flags))
30870 {
30871   bool ret = true;
30872   struct gcc_options func_options;
30873   tree cur_tree, new_optimize;
30874   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30875
30876   /* Get the optimization options of the current function.  */
30877   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30878
30879   /* If the function changed the optimization levels as well as setting target
30880      options, start with the optimizations specified.  */
30881   if (!func_optimize)
30882     func_optimize = optimization_default_node;
30883
30884   /* Init func_options.  */
30885   memset (&func_options, 0, sizeof (func_options));
30886   init_options_struct (&func_options, NULL);
30887   lang_hooks.init_options_struct (&func_options);
30888
30889   /* Initialize func_options to the defaults.  */
30890   cl_optimization_restore (&func_options,
30891                            TREE_OPTIMIZATION (func_optimize));
30892
30893   cl_target_option_restore (&func_options,
30894                             TREE_TARGET_OPTION (target_option_default_node));
30895
30896   /* Set func_options flags with new target mode.  */
30897   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30898                                               &global_options_set);
30899
30900   if (cur_tree == NULL_TREE)
30901     ret = false;
30902
30903   new_optimize = build_optimization_node (&func_options);
30904
30905   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30906
30907   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30908
30909   finalize_options_struct (&func_options);
30910
30911   return ret;
30912 }
30913
30914 /* Match an ISA feature bitmap to a named FPU.  We always use the
30915    first entry that exactly matches the feature set, so that we
30916    effectively canonicalize the FPU name for the assembler.  */
30917 static const char*
30918 arm_identify_fpu_from_isa (sbitmap isa)
30919 {
30920   auto_sbitmap fpubits (isa_num_bits);
30921   auto_sbitmap cand_fpubits (isa_num_bits);
30922
30923   bitmap_and (fpubits, isa, isa_all_fpubits);
30924
30925   /* If there are no ISA feature bits relating to the FPU, we must be
30926      doing soft-float.  */
30927   if (bitmap_empty_p (fpubits))
30928     return "softvfp";
30929
30930   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30931     {
30932       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30933       if (bitmap_equal_p (fpubits, cand_fpubits))
30934         return all_fpus[i].name;
30935     }
30936   /* We must find an entry, or things have gone wrong.  */
30937   gcc_unreachable ();
30938 }
30939
30940 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
30941    by the function fndecl.  */
30942 void
30943 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30944 {
30945   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30946
30947   struct cl_target_option *targ_options;
30948   if (target_parts)
30949     targ_options = TREE_TARGET_OPTION (target_parts);
30950   else
30951     targ_options = TREE_TARGET_OPTION (target_option_current_node);
30952   gcc_assert (targ_options);
30953
30954   /* Only update the assembler .arch string if it is distinct from the last
30955      such string we printed. arch_to_print is set conditionally in case
30956      targ_options->x_arm_arch_string is NULL which can be the case
30957      when cc1 is invoked directly without passing -march option.  */
30958   std::string arch_to_print;
30959   if (targ_options->x_arm_arch_string)
30960     arch_to_print = targ_options->x_arm_arch_string;
30961
30962   if (arch_to_print != arm_last_printed_arch_string)
30963     {
30964       std::string arch_name
30965         = arch_to_print.substr (0, arch_to_print.find ("+"));
30966       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30967       const arch_option *arch
30968         = arm_parse_arch_option_name (all_architectures, "-march",
30969                                       targ_options->x_arm_arch_string);
30970       auto_sbitmap opt_bits (isa_num_bits);
30971
30972       gcc_assert (arch);
30973       if (arch->common.extensions)
30974         {
30975           for (const struct cpu_arch_extension *opt = arch->common.extensions;
30976                opt->name != NULL;
30977                opt++)
30978             {
30979               if (!opt->remove)
30980                 {
30981                   arm_initialize_isa (opt_bits, opt->isa_bits);
30982                   if (bitmap_subset_p (opt_bits, arm_active_target.isa)
30983                       && !bitmap_subset_p (opt_bits, isa_all_fpubits))
30984                     asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
30985                                  opt->name);
30986                 }
30987              }
30988         }
30989
30990       arm_last_printed_arch_string = arch_to_print;
30991     }
30992
30993   fprintf (stream, "\t.syntax unified\n");
30994
30995   if (TARGET_THUMB)
30996     {
30997       if (is_called_in_ARM_mode (decl)
30998           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30999               && cfun->is_thunk))
31000         fprintf (stream, "\t.code 32\n");
31001       else if (TARGET_THUMB1)
31002         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31003       else
31004         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31005     }
31006   else
31007     fprintf (stream, "\t.arm\n");
31008
31009   std::string fpu_to_print
31010     = TARGET_SOFT_FLOAT
31011         ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31012
31013   if (fpu_to_print != arm_last_printed_arch_string)
31014     {
31015       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31016       arm_last_printed_fpu_string = fpu_to_print;
31017     }
31018
31019   if (TARGET_POKE_FUNCTION_NAME)
31020     arm_poke_function_name (stream, (const char *) name);
31021 }
31022
31023 /* If MEM is in the form of [base+offset], extract the two parts
31024    of address and set to BASE and OFFSET, otherwise return false
31025    after clearing BASE and OFFSET.  */
31026
31027 static bool
31028 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31029 {
31030   rtx addr;
31031
31032   gcc_assert (MEM_P (mem));
31033
31034   addr = XEXP (mem, 0);
31035
31036   /* Strip off const from addresses like (const (addr)).  */
31037   if (GET_CODE (addr) == CONST)
31038     addr = XEXP (addr, 0);
31039
31040   if (GET_CODE (addr) == REG)
31041     {
31042       *base = addr;
31043       *offset = const0_rtx;
31044       return true;
31045     }
31046
31047   if (GET_CODE (addr) == PLUS
31048       && GET_CODE (XEXP (addr, 0)) == REG
31049       && CONST_INT_P (XEXP (addr, 1)))
31050     {
31051       *base = XEXP (addr, 0);
31052       *offset = XEXP (addr, 1);
31053       return true;
31054     }
31055
31056   *base = NULL_RTX;
31057   *offset = NULL_RTX;
31058
31059   return false;
31060 }
31061
31062 /* If INSN is a load or store of address in the form of [base+offset],
31063    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
31064    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
31065    otherwise return FALSE.  */
31066
31067 static bool
31068 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31069 {
31070   rtx x, dest, src;
31071
31072   gcc_assert (INSN_P (insn));
31073   x = PATTERN (insn);
31074   if (GET_CODE (x) != SET)
31075     return false;
31076
31077   src = SET_SRC (x);
31078   dest = SET_DEST (x);
31079   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31080     {
31081       *is_load = false;
31082       extract_base_offset_in_addr (dest, base, offset);
31083     }
31084   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31085     {
31086       *is_load = true;
31087       extract_base_offset_in_addr (src, base, offset);
31088     }
31089   else
31090     return false;
31091
31092   return (*base != NULL_RTX && *offset != NULL_RTX);
31093 }
31094
31095 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31096
31097    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31098    and PRI are only calculated for these instructions.  For other instruction,
31099    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
31100    instruction fusion can be supported by returning different priorities.
31101
31102    It's important that irrelevant instructions get the largest FUSION_PRI.  */
31103
31104 static void
31105 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31106                            int *fusion_pri, int *pri)
31107 {
31108   int tmp, off_val;
31109   bool is_load;
31110   rtx base, offset;
31111
31112   gcc_assert (INSN_P (insn));
31113
31114   tmp = max_pri - 1;
31115   if (!fusion_load_store (insn, &base, &offset, &is_load))
31116     {
31117       *pri = tmp;
31118       *fusion_pri = tmp;
31119       return;
31120     }
31121
31122   /* Load goes first.  */
31123   if (is_load)
31124     *fusion_pri = tmp - 1;
31125   else
31126     *fusion_pri = tmp - 2;
31127
31128   tmp /= 2;
31129
31130   /* INSN with smaller base register goes first.  */
31131   tmp -= ((REGNO (base) & 0xff) << 20);
31132
31133   /* INSN with smaller offset goes first.  */
31134   off_val = (int)(INTVAL (offset));
31135   if (off_val >= 0)
31136     tmp -= (off_val & 0xfffff);
31137   else
31138     tmp += ((- off_val) & 0xfffff);
31139
31140   *pri = tmp;
31141   return;
31142 }
31143
31144
31145 /* Construct and return a PARALLEL RTX vector with elements numbering the
31146    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31147    the vector - from the perspective of the architecture.  This does not
31148    line up with GCC's perspective on lane numbers, so we end up with
31149    different masks depending on our target endian-ness.  The diagram
31150    below may help.  We must draw the distinction when building masks
31151    which select one half of the vector.  An instruction selecting
31152    architectural low-lanes for a big-endian target, must be described using
31153    a mask selecting GCC high-lanes.
31154
31155                  Big-Endian             Little-Endian
31156
31157 GCC             0   1   2   3           3   2   1   0
31158               | x | x | x | x |       | x | x | x | x |
31159 Architecture    3   2   1   0           3   2   1   0
31160
31161 Low Mask:         { 2, 3 }                { 0, 1 }
31162 High Mask:        { 0, 1 }                { 2, 3 }
31163 */
31164
31165 rtx
31166 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31167 {
31168   int nunits = GET_MODE_NUNITS (mode);
31169   rtvec v = rtvec_alloc (nunits / 2);
31170   int high_base = nunits / 2;
31171   int low_base = 0;
31172   int base;
31173   rtx t1;
31174   int i;
31175
31176   if (BYTES_BIG_ENDIAN)
31177     base = high ? low_base : high_base;
31178   else
31179     base = high ? high_base : low_base;
31180
31181   for (i = 0; i < nunits / 2; i++)
31182     RTVEC_ELT (v, i) = GEN_INT (base + i);
31183
31184   t1 = gen_rtx_PARALLEL (mode, v);
31185   return t1;
31186 }
31187
31188 /* Check OP for validity as a PARALLEL RTX vector with elements
31189    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31190    from the perspective of the architecture.  See the diagram above
31191    arm_simd_vect_par_cnst_half_p for more details.  */
31192
31193 bool
31194 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31195                                        bool high)
31196 {
31197   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31198   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31199   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31200   int i = 0;
31201
31202   if (!VECTOR_MODE_P (mode))
31203     return false;
31204
31205   if (count_op != count_ideal)
31206     return false;
31207
31208   for (i = 0; i < count_ideal; i++)
31209     {
31210       rtx elt_op = XVECEXP (op, 0, i);
31211       rtx elt_ideal = XVECEXP (ideal, 0, i);
31212
31213       if (!CONST_INT_P (elt_op)
31214           || INTVAL (elt_ideal) != INTVAL (elt_op))
31215         return false;
31216     }
31217   return true;
31218 }
31219
31220 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31221    in Thumb1.  */
31222 static bool
31223 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31224                          const_tree)
31225 {
31226   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31227   if (vcall_offset && TARGET_THUMB1)
31228     return false;
31229
31230   /* Otherwise ok.  */
31231   return true;
31232 }
31233
31234 /* Generate RTL for a conditional branch with rtx comparison CODE in
31235    mode CC_MODE. The destination of the unlikely conditional branch
31236    is LABEL_REF.  */
31237
31238 void
31239 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31240                           rtx label_ref)
31241 {
31242   rtx x;
31243   x = gen_rtx_fmt_ee (code, VOIDmode,
31244                       gen_rtx_REG (cc_mode, CC_REGNUM),
31245                       const0_rtx);
31246
31247   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31248                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
31249                             pc_rtx);
31250   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31251 }
31252
31253 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31254
31255    For pure-code sections there is no letter code for this attribute, so
31256    output all the section flags numerically when this is needed.  */
31257
31258 static bool
31259 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31260 {
31261
31262   if (flags & SECTION_ARM_PURECODE)
31263     {
31264       *num = 0x20000000;
31265
31266       if (!(flags & SECTION_DEBUG))
31267         *num |= 0x2;
31268       if (flags & SECTION_EXCLUDE)
31269         *num |= 0x80000000;
31270       if (flags & SECTION_WRITE)
31271         *num |= 0x1;
31272       if (flags & SECTION_CODE)
31273         *num |= 0x4;
31274       if (flags & SECTION_MERGE)
31275         *num |= 0x10;
31276       if (flags & SECTION_STRINGS)
31277         *num |= 0x20;
31278       if (flags & SECTION_TLS)
31279         *num |= 0x400;
31280       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31281         *num |= 0x200;
31282
31283         return true;
31284     }
31285
31286   return false;
31287 }
31288
31289 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31290
31291    If pure-code is passed as an option, make sure all functions are in
31292    sections that have the SHF_ARM_PURECODE attribute.  */
31293
31294 static section *
31295 arm_function_section (tree decl, enum node_frequency freq,
31296                       bool startup, bool exit)
31297 {
31298   const char * section_name;
31299   section * sec;
31300
31301   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31302     return default_function_section (decl, freq, startup, exit);
31303
31304   if (!target_pure_code)
31305     return default_function_section (decl, freq, startup, exit);
31306
31307
31308   section_name = DECL_SECTION_NAME (decl);
31309
31310   /* If a function is not in a named section then it falls under the 'default'
31311      text section, also known as '.text'.  We can preserve previous behavior as
31312      the default text section already has the SHF_ARM_PURECODE section
31313      attribute.  */
31314   if (!section_name)
31315     {
31316       section *default_sec = default_function_section (decl, freq, startup,
31317                                                        exit);
31318
31319       /* If default_sec is not null, then it must be a special section like for
31320          example .text.startup.  We set the pure-code attribute and return the
31321          same section to preserve existing behavior.  */
31322       if (default_sec)
31323           default_sec->common.flags |= SECTION_ARM_PURECODE;
31324       return default_sec;
31325     }
31326
31327   /* Otherwise look whether a section has already been created with
31328      'section_name'.  */
31329   sec = get_named_section (decl, section_name, 0);
31330   if (!sec)
31331     /* If that is not the case passing NULL as the section's name to
31332        'get_named_section' will create a section with the declaration's
31333        section name.  */
31334     sec = get_named_section (decl, NULL, 0);
31335
31336   /* Set the SHF_ARM_PURECODE attribute.  */
31337   sec->common.flags |= SECTION_ARM_PURECODE;
31338
31339   return sec;
31340 }
31341
31342 /* Implements the TARGET_SECTION_FLAGS hook.
31343
31344    If DECL is a function declaration and pure-code is passed as an option
31345    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31346    section's name and RELOC indicates whether the declarations initializer may
31347    contain runtime relocations.  */
31348
31349 static unsigned int
31350 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31351 {
31352   unsigned int flags = default_section_type_flags (decl, name, reloc);
31353
31354   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31355     flags |= SECTION_ARM_PURECODE;
31356
31357   return flags;
31358 }
31359
31360 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31361
31362 static void
31363 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31364                            rtx op0, rtx op1,
31365                            rtx *quot_p, rtx *rem_p)
31366 {
31367   if (mode == SImode)
31368     gcc_assert (!TARGET_IDIV);
31369
31370   scalar_int_mode libval_mode
31371     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31372
31373   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31374                                         libval_mode,
31375                                         op0, GET_MODE (op0),
31376                                         op1, GET_MODE (op1));
31377
31378   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31379   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31380                                        GET_MODE_SIZE (mode));
31381
31382   gcc_assert (quotient);
31383   gcc_assert (remainder);
31384
31385   *quot_p = quotient;
31386   *rem_p = remainder;
31387 }
31388
31389 /*  This function checks for the availability of the coprocessor builtin passed
31390     in BUILTIN for the current target.  Returns true if it is available and
31391     false otherwise.  If a BUILTIN is passed for which this function has not
31392     been implemented it will cause an exception.  */
31393
31394 bool
31395 arm_coproc_builtin_available (enum unspecv builtin)
31396 {
31397   /* None of these builtins are available in Thumb mode if the target only
31398      supports Thumb-1.  */
31399   if (TARGET_THUMB1)
31400     return false;
31401
31402   switch (builtin)
31403     {
31404       case VUNSPEC_CDP:
31405       case VUNSPEC_LDC:
31406       case VUNSPEC_LDCL:
31407       case VUNSPEC_STC:
31408       case VUNSPEC_STCL:
31409       case VUNSPEC_MCR:
31410       case VUNSPEC_MRC:
31411         if (arm_arch4)
31412           return true;
31413         break;
31414       case VUNSPEC_CDP2:
31415       case VUNSPEC_LDC2:
31416       case VUNSPEC_LDC2L:
31417       case VUNSPEC_STC2:
31418       case VUNSPEC_STC2L:
31419       case VUNSPEC_MCR2:
31420       case VUNSPEC_MRC2:
31421         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31422            ARMv8-{A,M}.  */
31423         if (arm_arch5t)
31424           return true;
31425         break;
31426       case VUNSPEC_MCRR:
31427       case VUNSPEC_MRRC:
31428         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31429            ARMv8-{A,M}.  */
31430         if (arm_arch6 || arm_arch5te)
31431           return true;
31432         break;
31433       case VUNSPEC_MCRR2:
31434       case VUNSPEC_MRRC2:
31435         if (arm_arch6)
31436           return true;
31437         break;
31438       default:
31439         gcc_unreachable ();
31440     }
31441   return false;
31442 }
31443
31444 /* This function returns true if OP is a valid memory operand for the ldc and
31445    stc coprocessor instructions and false otherwise.  */
31446
31447 bool
31448 arm_coproc_ldc_stc_legitimate_address (rtx op)
31449 {
31450   HOST_WIDE_INT range;
31451   /* Has to be a memory operand.  */
31452   if (!MEM_P (op))
31453     return false;
31454
31455   op = XEXP (op, 0);
31456
31457   /* We accept registers.  */
31458   if (REG_P (op))
31459     return true;
31460
31461   switch GET_CODE (op)
31462     {
31463       case PLUS:
31464         {
31465           /* Or registers with an offset.  */
31466           if (!REG_P (XEXP (op, 0)))
31467             return false;
31468
31469           op = XEXP (op, 1);
31470
31471           /* The offset must be an immediate though.  */
31472           if (!CONST_INT_P (op))
31473             return false;
31474
31475           range = INTVAL (op);
31476
31477           /* Within the range of [-1020,1020].  */
31478           if (!IN_RANGE (range, -1020, 1020))
31479             return false;
31480
31481           /* And a multiple of 4.  */
31482           return (range % 4) == 0;
31483         }
31484       case PRE_INC:
31485       case POST_INC:
31486       case PRE_DEC:
31487       case POST_DEC:
31488         return REG_P (XEXP (op, 0));
31489       default:
31490         gcc_unreachable ();
31491     }
31492   return false;
31493 }
31494
31495 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31496
31497    In VFPv1, VFP registers could only be accessed in the mode they were
31498    set, so subregs would be invalid there.  However, we don't support
31499    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31500
31501    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31502    VFP registers in little-endian order.  We can't describe that accurately to
31503    GCC, so avoid taking subregs of such values.
31504
31505    The only exception is going from a 128-bit to a 64-bit type.  In that
31506    case the data layout happens to be consistent for big-endian, so we
31507    explicitly allow that case.  */
31508
31509 static bool
31510 arm_can_change_mode_class (machine_mode from, machine_mode to,
31511                            reg_class_t rclass)
31512 {
31513   if (TARGET_BIG_END
31514       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31515       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31516           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31517       && reg_classes_intersect_p (VFP_REGS, rclass))
31518     return false;
31519   return true;
31520 }
31521
31522 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31523    strcpy from constants will be faster.  */
31524
31525 static HOST_WIDE_INT
31526 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31527 {
31528   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31529   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31530     return MAX (align, BITS_PER_WORD * factor);
31531   return align;
31532 }
31533
31534 #if CHECKING_P
31535 namespace selftest {
31536
31537 /* Scan the static data tables generated by parsecpu.awk looking for
31538    potential issues with the data.  We primarily check for
31539    inconsistencies in the option extensions at present (extensions
31540    that duplicate others but aren't marked as aliases).  Furthermore,
31541    for correct canonicalization later options must never be a subset
31542    of an earlier option.  Any extension should also only specify other
31543    feature bits and never an architecture bit.  The architecture is inferred
31544    from the declaration of the extension.  */
31545 static void
31546 arm_test_cpu_arch_data (void)
31547 {
31548   const arch_option *arch;
31549   const cpu_option *cpu;
31550   auto_sbitmap target_isa (isa_num_bits);
31551   auto_sbitmap isa1 (isa_num_bits);
31552   auto_sbitmap isa2 (isa_num_bits);
31553
31554   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31555     {
31556       const cpu_arch_extension *ext1, *ext2;
31557
31558       if (arch->common.extensions == NULL)
31559         continue;
31560
31561       arm_initialize_isa (target_isa, arch->common.isa_bits);
31562
31563       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31564         {
31565           if (ext1->alias)
31566             continue;
31567
31568           arm_initialize_isa (isa1, ext1->isa_bits);
31569           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31570             {
31571               if (ext2->alias || ext1->remove != ext2->remove)
31572                 continue;
31573
31574               arm_initialize_isa (isa2, ext2->isa_bits);
31575               /* If the option is a subset of the parent option, it doesn't
31576                  add anything and so isn't useful.  */
31577               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31578
31579               /* If the extension specifies any architectural bits then
31580                  disallow it.  Extensions should only specify feature bits.  */
31581               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31582             }
31583         }
31584     }
31585
31586   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31587     {
31588       const cpu_arch_extension *ext1, *ext2;
31589
31590       if (cpu->common.extensions == NULL)
31591         continue;
31592
31593       arm_initialize_isa (target_isa, arch->common.isa_bits);
31594
31595       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31596         {
31597           if (ext1->alias)
31598             continue;
31599
31600           arm_initialize_isa (isa1, ext1->isa_bits);
31601           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31602             {
31603               if (ext2->alias || ext1->remove != ext2->remove)
31604                 continue;
31605
31606               arm_initialize_isa (isa2, ext2->isa_bits);
31607               /* If the option is a subset of the parent option, it doesn't
31608                  add anything and so isn't useful.  */
31609               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31610
31611               /* If the extension specifies any architectural bits then
31612                  disallow it.  Extensions should only specify feature bits.  */
31613               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31614             }
31615         }
31616     }
31617 }
31618
31619 /* Scan the static data tables generated by parsecpu.awk looking for
31620    potential issues with the data.  Here we check for consistency between the
31621    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31622    a feature bit that is not defined by any FPU flag.  */
31623 static void
31624 arm_test_fpu_data (void)
31625 {
31626   auto_sbitmap isa_all_fpubits (isa_num_bits);
31627   auto_sbitmap fpubits (isa_num_bits);
31628   auto_sbitmap tmpset (isa_num_bits);
31629
31630   static const enum isa_feature fpu_bitlist[]
31631     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31632   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31633
31634   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31635   {
31636     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31637     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31638     bitmap_clear (isa_all_fpubits);
31639     bitmap_copy (isa_all_fpubits, tmpset);
31640   }
31641
31642   if (!bitmap_empty_p (isa_all_fpubits))
31643     {
31644         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31645                          " group that are not defined by any FPU.\n"
31646                          "       Check your arm-cpus.in.\n");
31647         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31648     }
31649 }
31650
31651 static void
31652 arm_run_selftests (void)
31653 {
31654   arm_test_cpu_arch_data ();
31655   arm_test_fpu_data ();
31656 }
31657 } /* Namespace selftest.  */
31658
31659 #undef TARGET_RUN_TARGET_SELFTESTS
31660 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31661 #endif /* CHECKING_P */
31662
31663 struct gcc_target targetm = TARGET_INITIALIZER;
31664
31665 #include "gt-arm.h"